Eliminate trailing whitespace
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "toplev.h"
43 #include "basic-block.h"
44 #include "ggc.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "cgraph.h"
49 #include "tree-gimple.h"
50 #include "dwarf2.h"
51 #include "df.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 static int x86_builtin_vectorization_cost (bool);
56
57 #ifndef CHECK_STACK_LIMIT
58 #define CHECK_STACK_LIMIT (-1)
59 #endif
60
61 /* Return index of given mode in mult and division cost tables. */
62 #define MODE_INDEX(mode) \
63 ((mode) == QImode ? 0 \
64 : (mode) == HImode ? 1 \
65 : (mode) == SImode ? 2 \
66 : (mode) == DImode ? 3 \
67 : 4)
68
69 /* Processor costs (relative to an add) */
70 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
71 #define COSTS_N_BYTES(N) ((N) * 2)
72
73 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74
75 static const
76 struct processor_costs size_cost = { /* costs for tuning for size */
77 COSTS_N_BYTES (2), /* cost of an add instruction */
78 COSTS_N_BYTES (3), /* cost of a lea instruction */
79 COSTS_N_BYTES (2), /* variable shift costs */
80 COSTS_N_BYTES (3), /* constant shift costs */
81 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
82 COSTS_N_BYTES (3), /* HI */
83 COSTS_N_BYTES (3), /* SI */
84 COSTS_N_BYTES (3), /* DI */
85 COSTS_N_BYTES (5)}, /* other */
86 0, /* cost of multiply per each bit set */
87 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
88 COSTS_N_BYTES (3), /* HI */
89 COSTS_N_BYTES (3), /* SI */
90 COSTS_N_BYTES (3), /* DI */
91 COSTS_N_BYTES (5)}, /* other */
92 COSTS_N_BYTES (3), /* cost of movsx */
93 COSTS_N_BYTES (3), /* cost of movzx */
94 0, /* "large" insn */
95 2, /* MOVE_RATIO */
96 2, /* cost for loading QImode using movzbl */
97 {2, 2, 2}, /* cost of loading integer registers
98 in QImode, HImode and SImode.
99 Relative to reg-reg move (2). */
100 {2, 2, 2}, /* cost of storing integer registers */
101 2, /* cost of reg,reg fld/fst */
102 {2, 2, 2}, /* cost of loading fp registers
103 in SFmode, DFmode and XFmode */
104 {2, 2, 2}, /* cost of storing fp registers
105 in SFmode, DFmode and XFmode */
106 3, /* cost of moving MMX register */
107 {3, 3}, /* cost of loading MMX registers
108 in SImode and DImode */
109 {3, 3}, /* cost of storing MMX registers
110 in SImode and DImode */
111 3, /* cost of moving SSE register */
112 {3, 3, 3}, /* cost of loading SSE registers
113 in SImode, DImode and TImode */
114 {3, 3, 3}, /* cost of storing SSE registers
115 in SImode, DImode and TImode */
116 3, /* MMX or SSE register to integer */
117 0, /* size of l1 cache */
118 0, /* size of l2 cache */
119 0, /* size of prefetch block */
120 0, /* number of parallel prefetches */
121 2, /* Branch cost */
122 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
123 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
124 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
125 COSTS_N_BYTES (2), /* cost of FABS instruction. */
126 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
127 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
128 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
129 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
130 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
131 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
132 1, /* scalar_stmt_cost. */
133 1, /* scalar load_cost. */
134 1, /* scalar_store_cost. */
135 1, /* vec_stmt_cost. */
136 1, /* vec_to_scalar_cost. */
137 1, /* scalar_to_vec_cost. */
138 1, /* vec_align_load_cost. */
139 1, /* vec_unalign_load_cost. */
140 1, /* vec_store_cost. */
141 1, /* cond_taken_branch_cost. */
142 1, /* cond_not_taken_branch_cost. */
143 };
144
145 /* Processor costs (relative to an add) */
146 static const
147 struct processor_costs i386_cost = { /* 386 specific costs */
148 COSTS_N_INSNS (1), /* cost of an add instruction */
149 COSTS_N_INSNS (1), /* cost of a lea instruction */
150 COSTS_N_INSNS (3), /* variable shift costs */
151 COSTS_N_INSNS (2), /* constant shift costs */
152 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
153 COSTS_N_INSNS (6), /* HI */
154 COSTS_N_INSNS (6), /* SI */
155 COSTS_N_INSNS (6), /* DI */
156 COSTS_N_INSNS (6)}, /* other */
157 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
158 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
159 COSTS_N_INSNS (23), /* HI */
160 COSTS_N_INSNS (23), /* SI */
161 COSTS_N_INSNS (23), /* DI */
162 COSTS_N_INSNS (23)}, /* other */
163 COSTS_N_INSNS (3), /* cost of movsx */
164 COSTS_N_INSNS (2), /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of storing fp registers
176 in SFmode, DFmode and XFmode */
177 2, /* cost of moving MMX register */
178 {4, 8}, /* cost of loading MMX registers
179 in SImode and DImode */
180 {4, 8}, /* cost of storing MMX registers
181 in SImode and DImode */
182 2, /* cost of moving SSE register */
183 {4, 8, 16}, /* cost of loading SSE registers
184 in SImode, DImode and TImode */
185 {4, 8, 16}, /* cost of storing SSE registers
186 in SImode, DImode and TImode */
187 3, /* MMX or SSE register to integer */
188 0, /* size of l1 cache */
189 0, /* size of l2 cache */
190 0, /* size of prefetch block */
191 0, /* number of parallel prefetches */
192 1, /* Branch cost */
193 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
194 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
195 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
196 COSTS_N_INSNS (22), /* cost of FABS instruction. */
197 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
198 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
199 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
200 DUMMY_STRINGOP_ALGS},
201 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
202 DUMMY_STRINGOP_ALGS},
203 1, /* scalar_stmt_cost. */
204 1, /* scalar load_cost. */
205 1, /* scalar_store_cost. */
206 1, /* vec_stmt_cost. */
207 1, /* vec_to_scalar_cost. */
208 1, /* scalar_to_vec_cost. */
209 1, /* vec_align_load_cost. */
210 2, /* vec_unalign_load_cost. */
211 1, /* vec_store_cost. */
212 3, /* cond_taken_branch_cost. */
213 1, /* cond_not_taken_branch_cost. */
214 };
215
216 static const
217 struct processor_costs i486_cost = { /* 486 specific costs */
218 COSTS_N_INSNS (1), /* cost of an add instruction */
219 COSTS_N_INSNS (1), /* cost of a lea instruction */
220 COSTS_N_INSNS (3), /* variable shift costs */
221 COSTS_N_INSNS (2), /* constant shift costs */
222 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
223 COSTS_N_INSNS (12), /* HI */
224 COSTS_N_INSNS (12), /* SI */
225 COSTS_N_INSNS (12), /* DI */
226 COSTS_N_INSNS (12)}, /* other */
227 1, /* cost of multiply per each bit set */
228 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
229 COSTS_N_INSNS (40), /* HI */
230 COSTS_N_INSNS (40), /* SI */
231 COSTS_N_INSNS (40), /* DI */
232 COSTS_N_INSNS (40)}, /* other */
233 COSTS_N_INSNS (3), /* cost of movsx */
234 COSTS_N_INSNS (2), /* cost of movzx */
235 15, /* "large" insn */
236 3, /* MOVE_RATIO */
237 4, /* cost for loading QImode using movzbl */
238 {2, 4, 2}, /* cost of loading integer registers
239 in QImode, HImode and SImode.
240 Relative to reg-reg move (2). */
241 {2, 4, 2}, /* cost of storing integer registers */
242 2, /* cost of reg,reg fld/fst */
243 {8, 8, 8}, /* cost of loading fp registers
244 in SFmode, DFmode and XFmode */
245 {8, 8, 8}, /* cost of storing fp registers
246 in SFmode, DFmode and XFmode */
247 2, /* cost of moving MMX register */
248 {4, 8}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {4, 8}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {4, 8, 16}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {4, 8, 16}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 4, /* size of l1 cache. 486 has 8kB cache
259 shared for code and data, so 4kB is
260 not really precise. */
261 4, /* size of l2 cache */
262 0, /* size of prefetch block */
263 0, /* number of parallel prefetches */
264 1, /* Branch cost */
265 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
266 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
267 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
268 COSTS_N_INSNS (3), /* cost of FABS instruction. */
269 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
270 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
271 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
272 DUMMY_STRINGOP_ALGS},
273 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
274 DUMMY_STRINGOP_ALGS},
275 1, /* scalar_stmt_cost. */
276 1, /* scalar load_cost. */
277 1, /* scalar_store_cost. */
278 1, /* vec_stmt_cost. */
279 1, /* vec_to_scalar_cost. */
280 1, /* scalar_to_vec_cost. */
281 1, /* vec_align_load_cost. */
282 2, /* vec_unalign_load_cost. */
283 1, /* vec_store_cost. */
284 3, /* cond_taken_branch_cost. */
285 1, /* cond_not_taken_branch_cost. */
286 };
287
288 static const
289 struct processor_costs pentium_cost = {
290 COSTS_N_INSNS (1), /* cost of an add instruction */
291 COSTS_N_INSNS (1), /* cost of a lea instruction */
292 COSTS_N_INSNS (4), /* variable shift costs */
293 COSTS_N_INSNS (1), /* constant shift costs */
294 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
295 COSTS_N_INSNS (11), /* HI */
296 COSTS_N_INSNS (11), /* SI */
297 COSTS_N_INSNS (11), /* DI */
298 COSTS_N_INSNS (11)}, /* other */
299 0, /* cost of multiply per each bit set */
300 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
301 COSTS_N_INSNS (25), /* HI */
302 COSTS_N_INSNS (25), /* SI */
303 COSTS_N_INSNS (25), /* DI */
304 COSTS_N_INSNS (25)}, /* other */
305 COSTS_N_INSNS (3), /* cost of movsx */
306 COSTS_N_INSNS (2), /* cost of movzx */
307 8, /* "large" insn */
308 6, /* MOVE_RATIO */
309 6, /* cost for loading QImode using movzbl */
310 {2, 4, 2}, /* cost of loading integer registers
311 in QImode, HImode and SImode.
312 Relative to reg-reg move (2). */
313 {2, 4, 2}, /* cost of storing integer registers */
314 2, /* cost of reg,reg fld/fst */
315 {2, 2, 6}, /* cost of loading fp registers
316 in SFmode, DFmode and XFmode */
317 {4, 4, 6}, /* cost of storing fp registers
318 in SFmode, DFmode and XFmode */
319 8, /* cost of moving MMX register */
320 {8, 8}, /* cost of loading MMX registers
321 in SImode and DImode */
322 {8, 8}, /* cost of storing MMX registers
323 in SImode and DImode */
324 2, /* cost of moving SSE register */
325 {4, 8, 16}, /* cost of loading SSE registers
326 in SImode, DImode and TImode */
327 {4, 8, 16}, /* cost of storing SSE registers
328 in SImode, DImode and TImode */
329 3, /* MMX or SSE register to integer */
330 8, /* size of l1 cache. */
331 8, /* size of l2 cache */
332 0, /* size of prefetch block */
333 0, /* number of parallel prefetches */
334 2, /* Branch cost */
335 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
336 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
337 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
338 COSTS_N_INSNS (1), /* cost of FABS instruction. */
339 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
340 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
341 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
342 DUMMY_STRINGOP_ALGS},
343 {{libcall, {{-1, rep_prefix_4_byte}}},
344 DUMMY_STRINGOP_ALGS},
345 1, /* scalar_stmt_cost. */
346 1, /* scalar load_cost. */
347 1, /* scalar_store_cost. */
348 1, /* vec_stmt_cost. */
349 1, /* vec_to_scalar_cost. */
350 1, /* scalar_to_vec_cost. */
351 1, /* vec_align_load_cost. */
352 2, /* vec_unalign_load_cost. */
353 1, /* vec_store_cost. */
354 3, /* cond_taken_branch_cost. */
355 1, /* cond_not_taken_branch_cost. */
356 };
357
358 static const
359 struct processor_costs pentiumpro_cost = {
360 COSTS_N_INSNS (1), /* cost of an add instruction */
361 COSTS_N_INSNS (1), /* cost of a lea instruction */
362 COSTS_N_INSNS (1), /* variable shift costs */
363 COSTS_N_INSNS (1), /* constant shift costs */
364 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
365 COSTS_N_INSNS (4), /* HI */
366 COSTS_N_INSNS (4), /* SI */
367 COSTS_N_INSNS (4), /* DI */
368 COSTS_N_INSNS (4)}, /* other */
369 0, /* cost of multiply per each bit set */
370 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
371 COSTS_N_INSNS (17), /* HI */
372 COSTS_N_INSNS (17), /* SI */
373 COSTS_N_INSNS (17), /* DI */
374 COSTS_N_INSNS (17)}, /* other */
375 COSTS_N_INSNS (1), /* cost of movsx */
376 COSTS_N_INSNS (1), /* cost of movzx */
377 8, /* "large" insn */
378 6, /* MOVE_RATIO */
379 2, /* cost for loading QImode using movzbl */
380 {4, 4, 4}, /* cost of loading integer registers
381 in QImode, HImode and SImode.
382 Relative to reg-reg move (2). */
383 {2, 2, 2}, /* cost of storing integer registers */
384 2, /* cost of reg,reg fld/fst */
385 {2, 2, 6}, /* cost of loading fp registers
386 in SFmode, DFmode and XFmode */
387 {4, 4, 6}, /* cost of storing fp registers
388 in SFmode, DFmode and XFmode */
389 2, /* cost of moving MMX register */
390 {2, 2}, /* cost of loading MMX registers
391 in SImode and DImode */
392 {2, 2}, /* cost of storing MMX registers
393 in SImode and DImode */
394 2, /* cost of moving SSE register */
395 {2, 2, 8}, /* cost of loading SSE registers
396 in SImode, DImode and TImode */
397 {2, 2, 8}, /* cost of storing SSE registers
398 in SImode, DImode and TImode */
399 3, /* MMX or SSE register to integer */
400 8, /* size of l1 cache. */
401 256, /* size of l2 cache */
402 32, /* size of prefetch block */
403 6, /* number of parallel prefetches */
404 2, /* Branch cost */
405 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
406 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
407 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
408 COSTS_N_INSNS (2), /* cost of FABS instruction. */
409 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
410 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
411 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
412 the alignment). For small blocks inline loop is still a noticeable win, for bigger
413 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
414 more expensive startup time in CPU, but after 4K the difference is down in the noise.
415 */
416 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
417 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
418 DUMMY_STRINGOP_ALGS},
419 {{rep_prefix_4_byte, {{1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS},
422 1, /* scalar_stmt_cost. */
423 1, /* scalar load_cost. */
424 1, /* scalar_store_cost. */
425 1, /* vec_stmt_cost. */
426 1, /* vec_to_scalar_cost. */
427 1, /* scalar_to_vec_cost. */
428 1, /* vec_align_load_cost. */
429 2, /* vec_unalign_load_cost. */
430 1, /* vec_store_cost. */
431 3, /* cond_taken_branch_cost. */
432 1, /* cond_not_taken_branch_cost. */
433 };
434
435 static const
436 struct processor_costs geode_cost = {
437 COSTS_N_INSNS (1), /* cost of an add instruction */
438 COSTS_N_INSNS (1), /* cost of a lea instruction */
439 COSTS_N_INSNS (2), /* variable shift costs */
440 COSTS_N_INSNS (1), /* constant shift costs */
441 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
442 COSTS_N_INSNS (4), /* HI */
443 COSTS_N_INSNS (7), /* SI */
444 COSTS_N_INSNS (7), /* DI */
445 COSTS_N_INSNS (7)}, /* other */
446 0, /* cost of multiply per each bit set */
447 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
448 COSTS_N_INSNS (23), /* HI */
449 COSTS_N_INSNS (39), /* SI */
450 COSTS_N_INSNS (39), /* DI */
451 COSTS_N_INSNS (39)}, /* other */
452 COSTS_N_INSNS (1), /* cost of movsx */
453 COSTS_N_INSNS (1), /* cost of movzx */
454 8, /* "large" insn */
455 4, /* MOVE_RATIO */
456 1, /* cost for loading QImode using movzbl */
457 {1, 1, 1}, /* cost of loading integer registers
458 in QImode, HImode and SImode.
459 Relative to reg-reg move (2). */
460 {1, 1, 1}, /* cost of storing integer registers */
461 1, /* cost of reg,reg fld/fst */
462 {1, 1, 1}, /* cost of loading fp registers
463 in SFmode, DFmode and XFmode */
464 {4, 6, 6}, /* cost of storing fp registers
465 in SFmode, DFmode and XFmode */
466
467 1, /* cost of moving MMX register */
468 {1, 1}, /* cost of loading MMX registers
469 in SImode and DImode */
470 {1, 1}, /* cost of storing MMX registers
471 in SImode and DImode */
472 1, /* cost of moving SSE register */
473 {1, 1, 1}, /* cost of loading SSE registers
474 in SImode, DImode and TImode */
475 {1, 1, 1}, /* cost of storing SSE registers
476 in SImode, DImode and TImode */
477 1, /* MMX or SSE register to integer */
478 64, /* size of l1 cache. */
479 128, /* size of l2 cache. */
480 32, /* size of prefetch block */
481 1, /* number of parallel prefetches */
482 1, /* Branch cost */
483 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
484 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
485 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
486 COSTS_N_INSNS (1), /* cost of FABS instruction. */
487 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
488 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
489 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
490 DUMMY_STRINGOP_ALGS},
491 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
492 DUMMY_STRINGOP_ALGS},
493 1, /* scalar_stmt_cost. */
494 1, /* scalar load_cost. */
495 1, /* scalar_store_cost. */
496 1, /* vec_stmt_cost. */
497 1, /* vec_to_scalar_cost. */
498 1, /* scalar_to_vec_cost. */
499 1, /* vec_align_load_cost. */
500 2, /* vec_unalign_load_cost. */
501 1, /* vec_store_cost. */
502 3, /* cond_taken_branch_cost. */
503 1, /* cond_not_taken_branch_cost. */
504 };
505
506 static const
507 struct processor_costs k6_cost = {
508 COSTS_N_INSNS (1), /* cost of an add instruction */
509 COSTS_N_INSNS (2), /* cost of a lea instruction */
510 COSTS_N_INSNS (1), /* variable shift costs */
511 COSTS_N_INSNS (1), /* constant shift costs */
512 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
513 COSTS_N_INSNS (3), /* HI */
514 COSTS_N_INSNS (3), /* SI */
515 COSTS_N_INSNS (3), /* DI */
516 COSTS_N_INSNS (3)}, /* other */
517 0, /* cost of multiply per each bit set */
518 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
519 COSTS_N_INSNS (18), /* HI */
520 COSTS_N_INSNS (18), /* SI */
521 COSTS_N_INSNS (18), /* DI */
522 COSTS_N_INSNS (18)}, /* other */
523 COSTS_N_INSNS (2), /* cost of movsx */
524 COSTS_N_INSNS (2), /* cost of movzx */
525 8, /* "large" insn */
526 4, /* MOVE_RATIO */
527 3, /* cost for loading QImode using movzbl */
528 {4, 5, 4}, /* cost of loading integer registers
529 in QImode, HImode and SImode.
530 Relative to reg-reg move (2). */
531 {2, 3, 2}, /* cost of storing integer registers */
532 4, /* cost of reg,reg fld/fst */
533 {6, 6, 6}, /* cost of loading fp registers
534 in SFmode, DFmode and XFmode */
535 {4, 4, 4}, /* cost of storing fp registers
536 in SFmode, DFmode and XFmode */
537 2, /* cost of moving MMX register */
538 {2, 2}, /* cost of loading MMX registers
539 in SImode and DImode */
540 {2, 2}, /* cost of storing MMX registers
541 in SImode and DImode */
542 2, /* cost of moving SSE register */
543 {2, 2, 8}, /* cost of loading SSE registers
544 in SImode, DImode and TImode */
545 {2, 2, 8}, /* cost of storing SSE registers
546 in SImode, DImode and TImode */
547 6, /* MMX or SSE register to integer */
548 32, /* size of l1 cache. */
549 32, /* size of l2 cache. Some models
550 have integrated l2 cache, but
551 optimizing for k6 is not important
552 enough to worry about that. */
553 32, /* size of prefetch block */
554 1, /* number of parallel prefetches */
555 1, /* Branch cost */
556 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
557 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
558 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
559 COSTS_N_INSNS (2), /* cost of FABS instruction. */
560 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
561 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
562 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
563 DUMMY_STRINGOP_ALGS},
564 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
565 DUMMY_STRINGOP_ALGS},
566 1, /* scalar_stmt_cost. */
567 1, /* scalar load_cost. */
568 1, /* scalar_store_cost. */
569 1, /* vec_stmt_cost. */
570 1, /* vec_to_scalar_cost. */
571 1, /* scalar_to_vec_cost. */
572 1, /* vec_align_load_cost. */
573 2, /* vec_unalign_load_cost. */
574 1, /* vec_store_cost. */
575 3, /* cond_taken_branch_cost. */
576 1, /* cond_not_taken_branch_cost. */
577 };
578
579 static const
580 struct processor_costs athlon_cost = {
581 COSTS_N_INSNS (1), /* cost of an add instruction */
582 COSTS_N_INSNS (2), /* cost of a lea instruction */
583 COSTS_N_INSNS (1), /* variable shift costs */
584 COSTS_N_INSNS (1), /* constant shift costs */
585 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
586 COSTS_N_INSNS (5), /* HI */
587 COSTS_N_INSNS (5), /* SI */
588 COSTS_N_INSNS (5), /* DI */
589 COSTS_N_INSNS (5)}, /* other */
590 0, /* cost of multiply per each bit set */
591 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
592 COSTS_N_INSNS (26), /* HI */
593 COSTS_N_INSNS (42), /* SI */
594 COSTS_N_INSNS (74), /* DI */
595 COSTS_N_INSNS (74)}, /* other */
596 COSTS_N_INSNS (1), /* cost of movsx */
597 COSTS_N_INSNS (1), /* cost of movzx */
598 8, /* "large" insn */
599 9, /* MOVE_RATIO */
600 4, /* cost for loading QImode using movzbl */
601 {3, 4, 3}, /* cost of loading integer registers
602 in QImode, HImode and SImode.
603 Relative to reg-reg move (2). */
604 {3, 4, 3}, /* cost of storing integer registers */
605 4, /* cost of reg,reg fld/fst */
606 {4, 4, 12}, /* cost of loading fp registers
607 in SFmode, DFmode and XFmode */
608 {6, 6, 8}, /* cost of storing fp registers
609 in SFmode, DFmode and XFmode */
610 2, /* cost of moving MMX register */
611 {4, 4}, /* cost of loading MMX registers
612 in SImode and DImode */
613 {4, 4}, /* cost of storing MMX registers
614 in SImode and DImode */
615 2, /* cost of moving SSE register */
616 {4, 4, 6}, /* cost of loading SSE registers
617 in SImode, DImode and TImode */
618 {4, 4, 5}, /* cost of storing SSE registers
619 in SImode, DImode and TImode */
620 5, /* MMX or SSE register to integer */
621 64, /* size of l1 cache. */
622 256, /* size of l2 cache. */
623 64, /* size of prefetch block */
624 6, /* number of parallel prefetches */
625 5, /* Branch cost */
626 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
627 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
628 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
629 COSTS_N_INSNS (2), /* cost of FABS instruction. */
630 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
631 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
632 /* For some reason, Athlon deals better with REP prefix (relative to loops)
633 compared to K8. Alignment becomes important after 8 bytes for memcpy and
634 128 bytes for memset. */
635 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
636 DUMMY_STRINGOP_ALGS},
637 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
638 DUMMY_STRINGOP_ALGS},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
650 };
651
652 static const
653 struct processor_costs k8_cost = {
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (2), /* cost of a lea instruction */
656 COSTS_N_INSNS (1), /* variable shift costs */
657 COSTS_N_INSNS (1), /* constant shift costs */
658 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (4), /* HI */
660 COSTS_N_INSNS (3), /* SI */
661 COSTS_N_INSNS (4), /* DI */
662 COSTS_N_INSNS (5)}, /* other */
663 0, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (26), /* HI */
666 COSTS_N_INSNS (42), /* SI */
667 COSTS_N_INSNS (74), /* DI */
668 COSTS_N_INSNS (74)}, /* other */
669 COSTS_N_INSNS (1), /* cost of movsx */
670 COSTS_N_INSNS (1), /* cost of movzx */
671 8, /* "large" insn */
672 9, /* MOVE_RATIO */
673 4, /* cost for loading QImode using movzbl */
674 {3, 4, 3}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {3, 4, 3}, /* cost of storing integer registers */
678 4, /* cost of reg,reg fld/fst */
679 {4, 4, 12}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {6, 6, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {3, 3}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 4}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 3, 6}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 4, 5}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 5, /* MMX or SSE register to integer */
694 64, /* size of l1 cache. */
695 512, /* size of l2 cache. */
696 64, /* size of prefetch block */
697 /* New AMD processors never drop prefetches; if they cannot be performed
698 immediately, they are queued. We set number of simultaneous prefetches
699 to a large constant to reflect this (it probably is not a good idea not
700 to limit number of prefetches at all, as their execution also takes some
701 time). */
702 100, /* number of parallel prefetches */
703 5, /* Branch cost */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
710 /* K8 has optimized REP instruction for medium sized blocks, but for very small
711 blocks it is better to use loop. For large blocks, libcall can do
712 nontemporary accesses and beat inline considerably. */
713 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
714 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
715 {{libcall, {{8, loop}, {24, unrolled_loop},
716 {2048, rep_prefix_4_byte}, {-1, libcall}}},
717 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 4, /* scalar_stmt_cost. */
719 2, /* scalar load_cost. */
720 2, /* scalar_store_cost. */
721 5, /* vec_stmt_cost. */
722 0, /* vec_to_scalar_cost. */
723 2, /* scalar_to_vec_cost. */
724 2, /* vec_align_load_cost. */
725 3, /* vec_unalign_load_cost. */
726 3, /* vec_store_cost. */
727 6, /* cond_taken_branch_cost. */
728 1, /* cond_not_taken_branch_cost. */
729 };
730
731 struct processor_costs amdfam10_cost = {
732 COSTS_N_INSNS (1), /* cost of an add instruction */
733 COSTS_N_INSNS (2), /* cost of a lea instruction */
734 COSTS_N_INSNS (1), /* variable shift costs */
735 COSTS_N_INSNS (1), /* constant shift costs */
736 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
737 COSTS_N_INSNS (4), /* HI */
738 COSTS_N_INSNS (3), /* SI */
739 COSTS_N_INSNS (4), /* DI */
740 COSTS_N_INSNS (5)}, /* other */
741 0, /* cost of multiply per each bit set */
742 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
743 COSTS_N_INSNS (35), /* HI */
744 COSTS_N_INSNS (51), /* SI */
745 COSTS_N_INSNS (83), /* DI */
746 COSTS_N_INSNS (83)}, /* other */
747 COSTS_N_INSNS (1), /* cost of movsx */
748 COSTS_N_INSNS (1), /* cost of movzx */
749 8, /* "large" insn */
750 9, /* MOVE_RATIO */
751 4, /* cost for loading QImode using movzbl */
752 {3, 4, 3}, /* cost of loading integer registers
753 in QImode, HImode and SImode.
754 Relative to reg-reg move (2). */
755 {3, 4, 3}, /* cost of storing integer registers */
756 4, /* cost of reg,reg fld/fst */
757 {4, 4, 12}, /* cost of loading fp registers
758 in SFmode, DFmode and XFmode */
759 {6, 6, 8}, /* cost of storing fp registers
760 in SFmode, DFmode and XFmode */
761 2, /* cost of moving MMX register */
762 {3, 3}, /* cost of loading MMX registers
763 in SImode and DImode */
764 {4, 4}, /* cost of storing MMX registers
765 in SImode and DImode */
766 2, /* cost of moving SSE register */
767 {4, 4, 3}, /* cost of loading SSE registers
768 in SImode, DImode and TImode */
769 {4, 4, 5}, /* cost of storing SSE registers
770 in SImode, DImode and TImode */
771 3, /* MMX or SSE register to integer */
772 /* On K8
773 MOVD reg64, xmmreg Double FSTORE 4
774 MOVD reg32, xmmreg Double FSTORE 4
775 On AMDFAM10
776 MOVD reg64, xmmreg Double FADD 3
777 1/1 1/1
778 MOVD reg32, xmmreg Double FADD 3
779 1/1 1/1 */
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
787 time). */
788 100, /* number of parallel prefetches */
789 5, /* Branch cost */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
796
797 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
798 very small blocks it is better to use loop. For large blocks, libcall can
799 do nontemporary accesses and beat inline considerably. */
800 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
801 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
802 {{libcall, {{8, loop}, {24, unrolled_loop},
803 {2048, rep_prefix_4_byte}, {-1, libcall}}},
804 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 4, /* scalar_stmt_cost. */
806 2, /* scalar load_cost. */
807 2, /* scalar_store_cost. */
808 6, /* vec_stmt_cost. */
809 0, /* vec_to_scalar_cost. */
810 2, /* scalar_to_vec_cost. */
811 2, /* vec_align_load_cost. */
812 2, /* vec_unalign_load_cost. */
813 2, /* vec_store_cost. */
814 6, /* cond_taken_branch_cost. */
815 1, /* cond_not_taken_branch_cost. */
816 };
817
818 static const
819 struct processor_costs pentium4_cost = {
820 COSTS_N_INSNS (1), /* cost of an add instruction */
821 COSTS_N_INSNS (3), /* cost of a lea instruction */
822 COSTS_N_INSNS (4), /* variable shift costs */
823 COSTS_N_INSNS (4), /* constant shift costs */
824 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
825 COSTS_N_INSNS (15), /* HI */
826 COSTS_N_INSNS (15), /* SI */
827 COSTS_N_INSNS (15), /* DI */
828 COSTS_N_INSNS (15)}, /* other */
829 0, /* cost of multiply per each bit set */
830 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
831 COSTS_N_INSNS (56), /* HI */
832 COSTS_N_INSNS (56), /* SI */
833 COSTS_N_INSNS (56), /* DI */
834 COSTS_N_INSNS (56)}, /* other */
835 COSTS_N_INSNS (1), /* cost of movsx */
836 COSTS_N_INSNS (1), /* cost of movzx */
837 16, /* "large" insn */
838 6, /* MOVE_RATIO */
839 2, /* cost for loading QImode using movzbl */
840 {4, 5, 4}, /* cost of loading integer registers
841 in QImode, HImode and SImode.
842 Relative to reg-reg move (2). */
843 {2, 3, 2}, /* cost of storing integer registers */
844 2, /* cost of reg,reg fld/fst */
845 {2, 2, 6}, /* cost of loading fp registers
846 in SFmode, DFmode and XFmode */
847 {4, 4, 6}, /* cost of storing fp registers
848 in SFmode, DFmode and XFmode */
849 2, /* cost of moving MMX register */
850 {2, 2}, /* cost of loading MMX registers
851 in SImode and DImode */
852 {2, 2}, /* cost of storing MMX registers
853 in SImode and DImode */
854 12, /* cost of moving SSE register */
855 {12, 12, 12}, /* cost of loading SSE registers
856 in SImode, DImode and TImode */
857 {2, 2, 8}, /* cost of storing SSE registers
858 in SImode, DImode and TImode */
859 10, /* MMX or SSE register to integer */
860 8, /* size of l1 cache. */
861 256, /* size of l2 cache. */
862 64, /* size of prefetch block */
863 6, /* number of parallel prefetches */
864 2, /* Branch cost */
865 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
866 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
867 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
868 COSTS_N_INSNS (2), /* cost of FABS instruction. */
869 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
870 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
871 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
872 DUMMY_STRINGOP_ALGS},
873 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
874 {-1, libcall}}},
875 DUMMY_STRINGOP_ALGS},
876 1, /* scalar_stmt_cost. */
877 1, /* scalar load_cost. */
878 1, /* scalar_store_cost. */
879 1, /* vec_stmt_cost. */
880 1, /* vec_to_scalar_cost. */
881 1, /* scalar_to_vec_cost. */
882 1, /* vec_align_load_cost. */
883 2, /* vec_unalign_load_cost. */
884 1, /* vec_store_cost. */
885 3, /* cond_taken_branch_cost. */
886 1, /* cond_not_taken_branch_cost. */
887 };
888
889 static const
890 struct processor_costs nocona_cost = {
891 COSTS_N_INSNS (1), /* cost of an add instruction */
892 COSTS_N_INSNS (1), /* cost of a lea instruction */
893 COSTS_N_INSNS (1), /* variable shift costs */
894 COSTS_N_INSNS (1), /* constant shift costs */
895 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
896 COSTS_N_INSNS (10), /* HI */
897 COSTS_N_INSNS (10), /* SI */
898 COSTS_N_INSNS (10), /* DI */
899 COSTS_N_INSNS (10)}, /* other */
900 0, /* cost of multiply per each bit set */
901 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
902 COSTS_N_INSNS (66), /* HI */
903 COSTS_N_INSNS (66), /* SI */
904 COSTS_N_INSNS (66), /* DI */
905 COSTS_N_INSNS (66)}, /* other */
906 COSTS_N_INSNS (1), /* cost of movsx */
907 COSTS_N_INSNS (1), /* cost of movzx */
908 16, /* "large" insn */
909 17, /* MOVE_RATIO */
910 4, /* cost for loading QImode using movzbl */
911 {4, 4, 4}, /* cost of loading integer registers
912 in QImode, HImode and SImode.
913 Relative to reg-reg move (2). */
914 {4, 4, 4}, /* cost of storing integer registers */
915 3, /* cost of reg,reg fld/fst */
916 {12, 12, 12}, /* cost of loading fp registers
917 in SFmode, DFmode and XFmode */
918 {4, 4, 4}, /* cost of storing fp registers
919 in SFmode, DFmode and XFmode */
920 6, /* cost of moving MMX register */
921 {12, 12}, /* cost of loading MMX registers
922 in SImode and DImode */
923 {12, 12}, /* cost of storing MMX registers
924 in SImode and DImode */
925 6, /* cost of moving SSE register */
926 {12, 12, 12}, /* cost of loading SSE registers
927 in SImode, DImode and TImode */
928 {12, 12, 12}, /* cost of storing SSE registers
929 in SImode, DImode and TImode */
930 8, /* MMX or SSE register to integer */
931 8, /* size of l1 cache. */
932 1024, /* size of l2 cache. */
933 128, /* size of prefetch block */
934 8, /* number of parallel prefetches */
935 1, /* Branch cost */
936 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
937 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
938 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
939 COSTS_N_INSNS (3), /* cost of FABS instruction. */
940 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
941 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
942 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
943 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
944 {100000, unrolled_loop}, {-1, libcall}}}},
945 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
946 {-1, libcall}}},
947 {libcall, {{24, loop}, {64, unrolled_loop},
948 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
949 1, /* scalar_stmt_cost. */
950 1, /* scalar load_cost. */
951 1, /* scalar_store_cost. */
952 1, /* vec_stmt_cost. */
953 1, /* vec_to_scalar_cost. */
954 1, /* scalar_to_vec_cost. */
955 1, /* vec_align_load_cost. */
956 2, /* vec_unalign_load_cost. */
957 1, /* vec_store_cost. */
958 3, /* cond_taken_branch_cost. */
959 1, /* cond_not_taken_branch_cost. */
960 };
961
962 static const
963 struct processor_costs core2_cost = {
964 COSTS_N_INSNS (1), /* cost of an add instruction */
965 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
966 COSTS_N_INSNS (1), /* variable shift costs */
967 COSTS_N_INSNS (1), /* constant shift costs */
968 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
969 COSTS_N_INSNS (3), /* HI */
970 COSTS_N_INSNS (3), /* SI */
971 COSTS_N_INSNS (3), /* DI */
972 COSTS_N_INSNS (3)}, /* other */
973 0, /* cost of multiply per each bit set */
974 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
975 COSTS_N_INSNS (22), /* HI */
976 COSTS_N_INSNS (22), /* SI */
977 COSTS_N_INSNS (22), /* DI */
978 COSTS_N_INSNS (22)}, /* other */
979 COSTS_N_INSNS (1), /* cost of movsx */
980 COSTS_N_INSNS (1), /* cost of movzx */
981 8, /* "large" insn */
982 16, /* MOVE_RATIO */
983 2, /* cost for loading QImode using movzbl */
984 {6, 6, 6}, /* cost of loading integer registers
985 in QImode, HImode and SImode.
986 Relative to reg-reg move (2). */
987 {4, 4, 4}, /* cost of storing integer registers */
988 2, /* cost of reg,reg fld/fst */
989 {6, 6, 6}, /* cost of loading fp registers
990 in SFmode, DFmode and XFmode */
991 {4, 4, 4}, /* cost of loading integer registers */
992 2, /* cost of moving MMX register */
993 {6, 6}, /* cost of loading MMX registers
994 in SImode and DImode */
995 {4, 4}, /* cost of storing MMX registers
996 in SImode and DImode */
997 2, /* cost of moving SSE register */
998 {6, 6, 6}, /* cost of loading SSE registers
999 in SImode, DImode and TImode */
1000 {4, 4, 4}, /* cost of storing SSE registers
1001 in SImode, DImode and TImode */
1002 2, /* MMX or SSE register to integer */
1003 32, /* size of l1 cache. */
1004 2048, /* size of l2 cache. */
1005 128, /* size of prefetch block */
1006 8, /* number of parallel prefetches */
1007 3, /* Branch cost */
1008 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1009 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1010 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1011 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1012 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1013 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1014 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1015 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1016 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1017 {{libcall, {{8, loop}, {15, unrolled_loop},
1018 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1019 {libcall, {{24, loop}, {32, unrolled_loop},
1020 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1021 1, /* scalar_stmt_cost. */
1022 1, /* scalar load_cost. */
1023 1, /* scalar_store_cost. */
1024 1, /* vec_stmt_cost. */
1025 1, /* vec_to_scalar_cost. */
1026 1, /* scalar_to_vec_cost. */
1027 1, /* vec_align_load_cost. */
1028 2, /* vec_unalign_load_cost. */
1029 1, /* vec_store_cost. */
1030 3, /* cond_taken_branch_cost. */
1031 1, /* cond_not_taken_branch_cost. */
1032 };
1033
1034 /* Generic64 should produce code tuned for Nocona and K8. */
1035 static const
1036 struct processor_costs generic64_cost = {
1037 COSTS_N_INSNS (1), /* cost of an add instruction */
1038 /* On all chips taken into consideration lea is 2 cycles and more. With
1039 this cost however our current implementation of synth_mult results in
1040 use of unnecessary temporary registers causing regression on several
1041 SPECfp benchmarks. */
1042 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1043 COSTS_N_INSNS (1), /* variable shift costs */
1044 COSTS_N_INSNS (1), /* constant shift costs */
1045 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1046 COSTS_N_INSNS (4), /* HI */
1047 COSTS_N_INSNS (3), /* SI */
1048 COSTS_N_INSNS (4), /* DI */
1049 COSTS_N_INSNS (2)}, /* other */
1050 0, /* cost of multiply per each bit set */
1051 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1052 COSTS_N_INSNS (26), /* HI */
1053 COSTS_N_INSNS (42), /* SI */
1054 COSTS_N_INSNS (74), /* DI */
1055 COSTS_N_INSNS (74)}, /* other */
1056 COSTS_N_INSNS (1), /* cost of movsx */
1057 COSTS_N_INSNS (1), /* cost of movzx */
1058 8, /* "large" insn */
1059 17, /* MOVE_RATIO */
1060 4, /* cost for loading QImode using movzbl */
1061 {4, 4, 4}, /* cost of loading integer registers
1062 in QImode, HImode and SImode.
1063 Relative to reg-reg move (2). */
1064 {4, 4, 4}, /* cost of storing integer registers */
1065 4, /* cost of reg,reg fld/fst */
1066 {12, 12, 12}, /* cost of loading fp registers
1067 in SFmode, DFmode and XFmode */
1068 {6, 6, 8}, /* cost of storing fp registers
1069 in SFmode, DFmode and XFmode */
1070 2, /* cost of moving MMX register */
1071 {8, 8}, /* cost of loading MMX registers
1072 in SImode and DImode */
1073 {8, 8}, /* cost of storing MMX registers
1074 in SImode and DImode */
1075 2, /* cost of moving SSE register */
1076 {8, 8, 8}, /* cost of loading SSE registers
1077 in SImode, DImode and TImode */
1078 {8, 8, 8}, /* cost of storing SSE registers
1079 in SImode, DImode and TImode */
1080 5, /* MMX or SSE register to integer */
1081 32, /* size of l1 cache. */
1082 512, /* size of l2 cache. */
1083 64, /* size of prefetch block */
1084 6, /* number of parallel prefetches */
1085 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1086 is increased to perhaps more appropriate value of 5. */
1087 3, /* Branch cost */
1088 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1089 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1090 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1091 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1092 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1093 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1094 {DUMMY_STRINGOP_ALGS,
1095 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1096 {DUMMY_STRINGOP_ALGS,
1097 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1109 };
1110
1111 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1112 static const
1113 struct processor_costs generic32_cost = {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1116 COSTS_N_INSNS (1), /* variable shift costs */
1117 COSTS_N_INSNS (1), /* constant shift costs */
1118 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1119 COSTS_N_INSNS (4), /* HI */
1120 COSTS_N_INSNS (3), /* SI */
1121 COSTS_N_INSNS (4), /* DI */
1122 COSTS_N_INSNS (2)}, /* other */
1123 0, /* cost of multiply per each bit set */
1124 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1125 COSTS_N_INSNS (26), /* HI */
1126 COSTS_N_INSNS (42), /* SI */
1127 COSTS_N_INSNS (74), /* DI */
1128 COSTS_N_INSNS (74)}, /* other */
1129 COSTS_N_INSNS (1), /* cost of movsx */
1130 COSTS_N_INSNS (1), /* cost of movzx */
1131 8, /* "large" insn */
1132 17, /* MOVE_RATIO */
1133 4, /* cost for loading QImode using movzbl */
1134 {4, 4, 4}, /* cost of loading integer registers
1135 in QImode, HImode and SImode.
1136 Relative to reg-reg move (2). */
1137 {4, 4, 4}, /* cost of storing integer registers */
1138 4, /* cost of reg,reg fld/fst */
1139 {12, 12, 12}, /* cost of loading fp registers
1140 in SFmode, DFmode and XFmode */
1141 {6, 6, 8}, /* cost of storing fp registers
1142 in SFmode, DFmode and XFmode */
1143 2, /* cost of moving MMX register */
1144 {8, 8}, /* cost of loading MMX registers
1145 in SImode and DImode */
1146 {8, 8}, /* cost of storing MMX registers
1147 in SImode and DImode */
1148 2, /* cost of moving SSE register */
1149 {8, 8, 8}, /* cost of loading SSE registers
1150 in SImode, DImode and TImode */
1151 {8, 8, 8}, /* cost of storing SSE registers
1152 in SImode, DImode and TImode */
1153 5, /* MMX or SSE register to integer */
1154 32, /* size of l1 cache. */
1155 256, /* size of l2 cache. */
1156 64, /* size of prefetch block */
1157 6, /* number of parallel prefetches */
1158 3, /* Branch cost */
1159 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1160 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1161 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1162 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1163 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1164 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1165 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1166 DUMMY_STRINGOP_ALGS},
1167 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1168 DUMMY_STRINGOP_ALGS},
1169 1, /* scalar_stmt_cost. */
1170 1, /* scalar load_cost. */
1171 1, /* scalar_store_cost. */
1172 1, /* vec_stmt_cost. */
1173 1, /* vec_to_scalar_cost. */
1174 1, /* scalar_to_vec_cost. */
1175 1, /* vec_align_load_cost. */
1176 2, /* vec_unalign_load_cost. */
1177 1, /* vec_store_cost. */
1178 3, /* cond_taken_branch_cost. */
1179 1, /* cond_not_taken_branch_cost. */
1180 };
1181
1182 const struct processor_costs *ix86_cost = &pentium_cost;
1183
1184 /* Processor feature/optimization bitmasks. */
1185 #define m_386 (1<<PROCESSOR_I386)
1186 #define m_486 (1<<PROCESSOR_I486)
1187 #define m_PENT (1<<PROCESSOR_PENTIUM)
1188 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1189 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1190 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1191 #define m_CORE2 (1<<PROCESSOR_CORE2)
1192
1193 #define m_GEODE (1<<PROCESSOR_GEODE)
1194 #define m_K6 (1<<PROCESSOR_K6)
1195 #define m_K6_GEODE (m_K6 | m_GEODE)
1196 #define m_K8 (1<<PROCESSOR_K8)
1197 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1198 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1199 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1200 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1201
1202 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1203 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1204
1205 /* Generic instruction choice should be common subset of supported CPUs
1206 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1207 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1208
1209 /* Feature tests against the various tunings. */
1210 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1211 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1212 negatively, so enabling for Generic64 seems like good code size
1213 tradeoff. We can't enable it for 32bit generic because it does not
1214 work well with PPro base chips. */
1215 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1216
1217 /* X86_TUNE_PUSH_MEMORY */
1218 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1219 | m_NOCONA | m_CORE2 | m_GENERIC,
1220
1221 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1222 m_486 | m_PENT,
1223
1224 /* X86_TUNE_USE_BIT_TEST */
1225 m_386,
1226
1227 /* X86_TUNE_UNROLL_STRLEN */
1228 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1229
1230 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1231 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1232
1233 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1234 on simulation result. But after P4 was made, no performance benefit
1235 was observed with branch hints. It also increases the code size.
1236 As a result, icc never generates branch hints. */
1237 0,
1238
1239 /* X86_TUNE_DOUBLE_WITH_ADD */
1240 ~m_386,
1241
1242 /* X86_TUNE_USE_SAHF */
1243 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1244 | m_NOCONA | m_CORE2 | m_GENERIC,
1245
1246 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1247 partial dependencies. */
1248 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1249 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1250
1251 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1252 register stalls on Generic32 compilation setting as well. However
1253 in current implementation the partial register stalls are not eliminated
1254 very well - they can be introduced via subregs synthesized by combine
1255 and can happen in caller/callee saving sequences. Because this option
1256 pays back little on PPro based chips and is in conflict with partial reg
1257 dependencies used by Athlon/P4 based chips, it is better to leave it off
1258 for generic32 for now. */
1259 m_PPRO,
1260
1261 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1262 m_CORE2 | m_GENERIC,
1263
1264 /* X86_TUNE_USE_HIMODE_FIOP */
1265 m_386 | m_486 | m_K6_GEODE,
1266
1267 /* X86_TUNE_USE_SIMODE_FIOP */
1268 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1269
1270 /* X86_TUNE_USE_MOV0 */
1271 m_K6,
1272
1273 /* X86_TUNE_USE_CLTD */
1274 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1275
1276 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1277 m_PENT4,
1278
1279 /* X86_TUNE_SPLIT_LONG_MOVES */
1280 m_PPRO,
1281
1282 /* X86_TUNE_READ_MODIFY_WRITE */
1283 ~m_PENT,
1284
1285 /* X86_TUNE_READ_MODIFY */
1286 ~(m_PENT | m_PPRO),
1287
1288 /* X86_TUNE_PROMOTE_QIMODE */
1289 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1290 | m_GENERIC /* | m_PENT4 ? */,
1291
1292 /* X86_TUNE_FAST_PREFIX */
1293 ~(m_PENT | m_486 | m_386),
1294
1295 /* X86_TUNE_SINGLE_STRINGOP */
1296 m_386 | m_PENT4 | m_NOCONA,
1297
1298 /* X86_TUNE_QIMODE_MATH */
1299 ~0,
1300
1301 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1302 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1303 might be considered for Generic32 if our scheme for avoiding partial
1304 stalls was more effective. */
1305 ~m_PPRO,
1306
1307 /* X86_TUNE_PROMOTE_QI_REGS */
1308 0,
1309
1310 /* X86_TUNE_PROMOTE_HI_REGS */
1311 m_PPRO,
1312
1313 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1314 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1315
1316 /* X86_TUNE_ADD_ESP_8 */
1317 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1318 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1319
1320 /* X86_TUNE_SUB_ESP_4 */
1321 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1322
1323 /* X86_TUNE_SUB_ESP_8 */
1324 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1325 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1326
1327 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1328 for DFmode copies */
1329 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1330 | m_GENERIC | m_GEODE),
1331
1332 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1333 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1334
1335 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1336 conflict here in between PPro/Pentium4 based chips that thread 128bit
1337 SSE registers as single units versus K8 based chips that divide SSE
1338 registers to two 64bit halves. This knob promotes all store destinations
1339 to be 128bit to allow register renaming on 128bit SSE units, but usually
1340 results in one extra microop on 64bit SSE units. Experimental results
1341 shows that disabling this option on P4 brings over 20% SPECfp regression,
1342 while enabling it on K8 brings roughly 2.4% regression that can be partly
1343 masked by careful scheduling of moves. */
1344 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1345
1346 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1347 m_AMDFAM10,
1348
1349 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1350 are resolved on SSE register parts instead of whole registers, so we may
1351 maintain just lower part of scalar values in proper format leaving the
1352 upper part undefined. */
1353 m_ATHLON_K8,
1354
1355 /* X86_TUNE_SSE_TYPELESS_STORES */
1356 m_AMD_MULTIPLE,
1357
1358 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1359 m_PPRO | m_PENT4 | m_NOCONA,
1360
1361 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1362 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1363
1364 /* X86_TUNE_PROLOGUE_USING_MOVE */
1365 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1366
1367 /* X86_TUNE_EPILOGUE_USING_MOVE */
1368 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1369
1370 /* X86_TUNE_SHIFT1 */
1371 ~m_486,
1372
1373 /* X86_TUNE_USE_FFREEP */
1374 m_AMD_MULTIPLE,
1375
1376 /* X86_TUNE_INTER_UNIT_MOVES */
1377 ~(m_AMD_MULTIPLE | m_GENERIC),
1378
1379 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1380 ~(m_AMDFAM10),
1381
1382 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1383 than 4 branch instructions in the 16 byte window. */
1384 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1385
1386 /* X86_TUNE_SCHEDULE */
1387 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1388
1389 /* X86_TUNE_USE_BT */
1390 m_AMD_MULTIPLE,
1391
1392 /* X86_TUNE_USE_INCDEC */
1393 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1394
1395 /* X86_TUNE_PAD_RETURNS */
1396 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1397
1398 /* X86_TUNE_EXT_80387_CONSTANTS */
1399 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1400
1401 /* X86_TUNE_SHORTEN_X87_SSE */
1402 ~m_K8,
1403
1404 /* X86_TUNE_AVOID_VECTOR_DECODE */
1405 m_K8 | m_GENERIC64,
1406
1407 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1408 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1409 ~(m_386 | m_486),
1410
1411 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1412 vector path on AMD machines. */
1413 m_K8 | m_GENERIC64 | m_AMDFAM10,
1414
1415 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1416 machines. */
1417 m_K8 | m_GENERIC64 | m_AMDFAM10,
1418
1419 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1420 than a MOV. */
1421 m_PENT,
1422
1423 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1424 but one byte longer. */
1425 m_PENT,
1426
1427 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1428 operand that cannot be represented using a modRM byte. The XOR
1429 replacement is long decoded, so this split helps here as well. */
1430 m_K6,
1431
1432 /* X86_TUNE_USE_VECTOR_CONVERTS: Preffer vector packed SSE conversion
1433 from integer to FP. */
1434 m_AMDFAM10,
1435 };
1436
1437 /* Feature tests against the various architecture variations. */
1438 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1439 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1440 ~(m_386 | m_486 | m_PENT | m_K6),
1441
1442 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1443 ~m_386,
1444
1445 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1446 ~(m_386 | m_486),
1447
1448 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1449 ~m_386,
1450
1451 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1452 ~m_386,
1453 };
1454
1455 static const unsigned int x86_accumulate_outgoing_args
1456 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1457
1458 static const unsigned int x86_arch_always_fancy_math_387
1459 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1460 | m_NOCONA | m_CORE2 | m_GENERIC;
1461
1462 static enum stringop_alg stringop_alg = no_stringop;
1463
1464 /* In case the average insn count for single function invocation is
1465 lower than this constant, emit fast (but longer) prologue and
1466 epilogue code. */
1467 #define FAST_PROLOGUE_INSN_COUNT 20
1468
1469 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1470 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1471 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1472 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1473
1474 /* Array of the smallest class containing reg number REGNO, indexed by
1475 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1476
1477 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1478 {
1479 /* ax, dx, cx, bx */
1480 AREG, DREG, CREG, BREG,
1481 /* si, di, bp, sp */
1482 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1483 /* FP registers */
1484 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1485 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1486 /* arg pointer */
1487 NON_Q_REGS,
1488 /* flags, fpsr, fpcr, frame */
1489 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1490 /* SSE registers */
1491 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1492 SSE_REGS, SSE_REGS,
1493 /* MMX registers */
1494 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1495 MMX_REGS, MMX_REGS,
1496 /* REX registers */
1497 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1498 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1499 /* SSE REX registers */
1500 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1501 SSE_REGS, SSE_REGS,
1502 };
1503
1504 /* The "default" register map used in 32bit mode. */
1505
1506 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1507 {
1508 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1509 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1510 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1511 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1512 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1513 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1514 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1515 };
1516
1517 static int const x86_64_int_parameter_registers[6] =
1518 {
1519 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1520 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1521 };
1522
1523 static int const x86_64_ms_abi_int_parameter_registers[4] =
1524 {
1525 2 /*RCX*/, 1 /*RDX*/,
1526 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1527 };
1528
1529 static int const x86_64_int_return_registers[4] =
1530 {
1531 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1532 };
1533
1534 /* The "default" register map used in 64bit mode. */
1535 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1536 {
1537 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1538 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1539 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1540 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1541 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1542 8,9,10,11,12,13,14,15, /* extended integer registers */
1543 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1544 };
1545
1546 /* Define the register numbers to be used in Dwarf debugging information.
1547 The SVR4 reference port C compiler uses the following register numbers
1548 in its Dwarf output code:
1549 0 for %eax (gcc regno = 0)
1550 1 for %ecx (gcc regno = 2)
1551 2 for %edx (gcc regno = 1)
1552 3 for %ebx (gcc regno = 3)
1553 4 for %esp (gcc regno = 7)
1554 5 for %ebp (gcc regno = 6)
1555 6 for %esi (gcc regno = 4)
1556 7 for %edi (gcc regno = 5)
1557 The following three DWARF register numbers are never generated by
1558 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1559 believes these numbers have these meanings.
1560 8 for %eip (no gcc equivalent)
1561 9 for %eflags (gcc regno = 17)
1562 10 for %trapno (no gcc equivalent)
1563 It is not at all clear how we should number the FP stack registers
1564 for the x86 architecture. If the version of SDB on x86/svr4 were
1565 a bit less brain dead with respect to floating-point then we would
1566 have a precedent to follow with respect to DWARF register numbers
1567 for x86 FP registers, but the SDB on x86/svr4 is so completely
1568 broken with respect to FP registers that it is hardly worth thinking
1569 of it as something to strive for compatibility with.
1570 The version of x86/svr4 SDB I have at the moment does (partially)
1571 seem to believe that DWARF register number 11 is associated with
1572 the x86 register %st(0), but that's about all. Higher DWARF
1573 register numbers don't seem to be associated with anything in
1574 particular, and even for DWARF regno 11, SDB only seems to under-
1575 stand that it should say that a variable lives in %st(0) (when
1576 asked via an `=' command) if we said it was in DWARF regno 11,
1577 but SDB still prints garbage when asked for the value of the
1578 variable in question (via a `/' command).
1579 (Also note that the labels SDB prints for various FP stack regs
1580 when doing an `x' command are all wrong.)
1581 Note that these problems generally don't affect the native SVR4
1582 C compiler because it doesn't allow the use of -O with -g and
1583 because when it is *not* optimizing, it allocates a memory
1584 location for each floating-point variable, and the memory
1585 location is what gets described in the DWARF AT_location
1586 attribute for the variable in question.
1587 Regardless of the severe mental illness of the x86/svr4 SDB, we
1588 do something sensible here and we use the following DWARF
1589 register numbers. Note that these are all stack-top-relative
1590 numbers.
1591 11 for %st(0) (gcc regno = 8)
1592 12 for %st(1) (gcc regno = 9)
1593 13 for %st(2) (gcc regno = 10)
1594 14 for %st(3) (gcc regno = 11)
1595 15 for %st(4) (gcc regno = 12)
1596 16 for %st(5) (gcc regno = 13)
1597 17 for %st(6) (gcc regno = 14)
1598 18 for %st(7) (gcc regno = 15)
1599 */
1600 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1601 {
1602 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1603 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1604 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1605 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1606 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1607 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1608 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1609 };
1610
1611 /* Test and compare insns in i386.md store the information needed to
1612 generate branch and scc insns here. */
1613
1614 rtx ix86_compare_op0 = NULL_RTX;
1615 rtx ix86_compare_op1 = NULL_RTX;
1616 rtx ix86_compare_emitted = NULL_RTX;
1617
1618 /* Size of the register save area. */
1619 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1620
1621 /* Define the structure for the machine field in struct function. */
1622
1623 struct stack_local_entry GTY(())
1624 {
1625 unsigned short mode;
1626 unsigned short n;
1627 rtx rtl;
1628 struct stack_local_entry *next;
1629 };
1630
1631 /* Structure describing stack frame layout.
1632 Stack grows downward:
1633
1634 [arguments]
1635 <- ARG_POINTER
1636 saved pc
1637
1638 saved frame pointer if frame_pointer_needed
1639 <- HARD_FRAME_POINTER
1640 [saved regs]
1641
1642 [padding1] \
1643 )
1644 [va_arg registers] (
1645 > to_allocate <- FRAME_POINTER
1646 [frame] (
1647 )
1648 [padding2] /
1649 */
1650 struct ix86_frame
1651 {
1652 int nregs;
1653 int padding1;
1654 int va_arg_size;
1655 HOST_WIDE_INT frame;
1656 int padding2;
1657 int outgoing_arguments_size;
1658 int red_zone_size;
1659
1660 HOST_WIDE_INT to_allocate;
1661 /* The offsets relative to ARG_POINTER. */
1662 HOST_WIDE_INT frame_pointer_offset;
1663 HOST_WIDE_INT hard_frame_pointer_offset;
1664 HOST_WIDE_INT stack_pointer_offset;
1665
1666 /* When save_regs_using_mov is set, emit prologue using
1667 move instead of push instructions. */
1668 bool save_regs_using_mov;
1669 };
1670
1671 /* Code model option. */
1672 enum cmodel ix86_cmodel;
1673 /* Asm dialect. */
1674 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1675 /* TLS dialects. */
1676 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1677
1678 /* Which unit we are generating floating point math for. */
1679 enum fpmath_unit ix86_fpmath;
1680
1681 /* Which cpu are we scheduling for. */
1682 enum processor_type ix86_tune;
1683
1684 /* Which instruction set architecture to use. */
1685 enum processor_type ix86_arch;
1686
1687 /* true if sse prefetch instruction is not NOOP. */
1688 int x86_prefetch_sse;
1689
1690 /* ix86_regparm_string as a number */
1691 static int ix86_regparm;
1692
1693 /* -mstackrealign option */
1694 extern int ix86_force_align_arg_pointer;
1695 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1696
1697 /* Preferred alignment for stack boundary in bits. */
1698 unsigned int ix86_preferred_stack_boundary;
1699
1700 /* Values 1-5: see jump.c */
1701 int ix86_branch_cost;
1702
1703 /* Variables which are this size or smaller are put in the data/bss
1704 or ldata/lbss sections. */
1705
1706 int ix86_section_threshold = 65536;
1707
1708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1709 char internal_label_prefix[16];
1710 int internal_label_prefix_len;
1711
1712 /* Fence to use after loop using movnt. */
1713 tree x86_mfence;
1714
1715 /* Register class used for passing given 64bit part of the argument.
1716 These represent classes as documented by the PS ABI, with the exception
1717 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1718 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1719
1720 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1721 whenever possible (upper half does contain padding). */
1722 enum x86_64_reg_class
1723 {
1724 X86_64_NO_CLASS,
1725 X86_64_INTEGER_CLASS,
1726 X86_64_INTEGERSI_CLASS,
1727 X86_64_SSE_CLASS,
1728 X86_64_SSESF_CLASS,
1729 X86_64_SSEDF_CLASS,
1730 X86_64_SSEUP_CLASS,
1731 X86_64_X87_CLASS,
1732 X86_64_X87UP_CLASS,
1733 X86_64_COMPLEX_X87_CLASS,
1734 X86_64_MEMORY_CLASS
1735 };
1736 static const char * const x86_64_reg_class_name[] =
1737 {
1738 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1739 "sseup", "x87", "x87up", "cplx87", "no"
1740 };
1741
1742 #define MAX_CLASSES 4
1743
1744 /* Table of constants used by fldpi, fldln2, etc.... */
1745 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1746 static bool ext_80387_constants_init = 0;
1747
1748 \f
1749 static struct machine_function * ix86_init_machine_status (void);
1750 static rtx ix86_function_value (const_tree, const_tree, bool);
1751 static int ix86_function_regparm (const_tree, const_tree);
1752 static void ix86_compute_frame_layout (struct ix86_frame *);
1753 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1754 rtx, rtx, int);
1755
1756 \f
1757 /* The svr4 ABI for the i386 says that records and unions are returned
1758 in memory. */
1759 #ifndef DEFAULT_PCC_STRUCT_RETURN
1760 #define DEFAULT_PCC_STRUCT_RETURN 1
1761 #endif
1762
1763 /* Bit flags that specify the ISA we are compiling for. */
1764 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1765
1766 /* A mask of ix86_isa_flags that includes bit X if X
1767 was set or cleared on the command line. */
1768 static int ix86_isa_flags_explicit;
1769
1770 /* Define a set of ISAs which aren't available for a given ISA. MMX
1771 and SSE ISAs are handled separately. */
1772
1773 #define OPTION_MASK_ISA_MMX_UNSET \
1774 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1775 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1776
1777 #define OPTION_MASK_ISA_SSE_UNSET \
1778 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1779 #define OPTION_MASK_ISA_SSE2_UNSET \
1780 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1781 #define OPTION_MASK_ISA_SSE3_UNSET \
1782 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1783 #define OPTION_MASK_ISA_SSSE3_UNSET \
1784 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1785 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1786 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1787 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1788
1789 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1790 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
1791 #define OPTION_MASK_ISA_SSE4 \
1792 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1793 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1794
1795 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1796
1797 #define OPTION_MASK_ISA_SSE5_UNSET \
1798 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1799
1800 /* Vectorization library interface and handlers. */
1801 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1802 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1803
1804 /* Implement TARGET_HANDLE_OPTION. */
1805
1806 static bool
1807 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1808 {
1809 switch (code)
1810 {
1811 case OPT_mmmx:
1812 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
1813 if (!value)
1814 {
1815 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1816 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1817 }
1818 return true;
1819
1820 case OPT_m3dnow:
1821 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
1822 if (!value)
1823 {
1824 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1825 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1826 }
1827 return true;
1828
1829 case OPT_m3dnowa:
1830 return false;
1831
1832 case OPT_msse:
1833 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
1834 if (!value)
1835 {
1836 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1837 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1838 }
1839 return true;
1840
1841 case OPT_msse2:
1842 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
1843 if (!value)
1844 {
1845 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1846 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1847 }
1848 return true;
1849
1850 case OPT_msse3:
1851 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
1852 if (!value)
1853 {
1854 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1855 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1856 }
1857 return true;
1858
1859 case OPT_mssse3:
1860 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
1861 if (!value)
1862 {
1863 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1864 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1865 }
1866 return true;
1867
1868 case OPT_msse4_1:
1869 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
1870 if (!value)
1871 {
1872 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1873 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1874 }
1875 return true;
1876
1877 case OPT_msse4_2:
1878 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
1879 if (!value)
1880 {
1881 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1882 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1883 }
1884 return true;
1885
1886 case OPT_msse4:
1887 ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
1888 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
1889 return true;
1890
1891 case OPT_mno_sse4:
1892 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1893 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1894 return true;
1895
1896 case OPT_msse4a:
1897 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
1898 if (!value)
1899 {
1900 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1901 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1902 }
1903 return true;
1904
1905 case OPT_msse5:
1906 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5;
1907 if (!value)
1908 {
1909 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
1910 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
1911 }
1912 return true;
1913
1914 default:
1915 return true;
1916 }
1917 }
1918
1919 /* Sometimes certain combinations of command options do not make
1920 sense on a particular target machine. You can define a macro
1921 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1922 defined, is executed once just after all the command options have
1923 been parsed.
1924
1925 Don't use this macro to turn on various extra optimizations for
1926 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1927
1928 void
1929 override_options (void)
1930 {
1931 int i;
1932 int ix86_tune_defaulted = 0;
1933 int ix86_arch_specified = 0;
1934 unsigned int ix86_arch_mask, ix86_tune_mask;
1935
1936 /* Comes from final.c -- no real reason to change it. */
1937 #define MAX_CODE_ALIGN 16
1938
1939 static struct ptt
1940 {
1941 const struct processor_costs *cost; /* Processor costs */
1942 const int align_loop; /* Default alignments. */
1943 const int align_loop_max_skip;
1944 const int align_jump;
1945 const int align_jump_max_skip;
1946 const int align_func;
1947 }
1948 const processor_target_table[PROCESSOR_max] =
1949 {
1950 {&i386_cost, 4, 3, 4, 3, 4},
1951 {&i486_cost, 16, 15, 16, 15, 16},
1952 {&pentium_cost, 16, 7, 16, 7, 16},
1953 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1954 {&geode_cost, 0, 0, 0, 0, 0},
1955 {&k6_cost, 32, 7, 32, 7, 32},
1956 {&athlon_cost, 16, 7, 16, 7, 16},
1957 {&pentium4_cost, 0, 0, 0, 0, 0},
1958 {&k8_cost, 16, 7, 16, 7, 16},
1959 {&nocona_cost, 0, 0, 0, 0, 0},
1960 {&core2_cost, 16, 10, 16, 10, 16},
1961 {&generic32_cost, 16, 7, 16, 7, 16},
1962 {&generic64_cost, 16, 10, 16, 10, 16},
1963 {&amdfam10_cost, 32, 24, 32, 7, 32}
1964 };
1965
1966 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1967 enum pta_flags
1968 {
1969 PTA_SSE = 1 << 0,
1970 PTA_SSE2 = 1 << 1,
1971 PTA_SSE3 = 1 << 2,
1972 PTA_MMX = 1 << 3,
1973 PTA_PREFETCH_SSE = 1 << 4,
1974 PTA_3DNOW = 1 << 5,
1975 PTA_3DNOW_A = 1 << 6,
1976 PTA_64BIT = 1 << 7,
1977 PTA_SSSE3 = 1 << 8,
1978 PTA_CX16 = 1 << 9,
1979 PTA_POPCNT = 1 << 10,
1980 PTA_ABM = 1 << 11,
1981 PTA_SSE4A = 1 << 12,
1982 PTA_NO_SAHF = 1 << 13,
1983 PTA_SSE4_1 = 1 << 14,
1984 PTA_SSE4_2 = 1 << 15,
1985 PTA_SSE5 = 1 << 16
1986 };
1987
1988 static struct pta
1989 {
1990 const char *const name; /* processor name or nickname. */
1991 const enum processor_type processor;
1992 const unsigned /*enum pta_flags*/ flags;
1993 }
1994 const processor_alias_table[] =
1995 {
1996 {"i386", PROCESSOR_I386, 0},
1997 {"i486", PROCESSOR_I486, 0},
1998 {"i586", PROCESSOR_PENTIUM, 0},
1999 {"pentium", PROCESSOR_PENTIUM, 0},
2000 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2001 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2002 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2003 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2004 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2005 {"i686", PROCESSOR_PENTIUMPRO, 0},
2006 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2007 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2008 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2009 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2010 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2011 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2012 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2013 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2014 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2015 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2016 | PTA_CX16 | PTA_NO_SAHF)},
2017 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2018 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2019 | PTA_SSSE3
2020 | PTA_CX16)},
2021 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2022 |PTA_PREFETCH_SSE)},
2023 {"k6", PROCESSOR_K6, PTA_MMX},
2024 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2025 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2026 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2027 | PTA_PREFETCH_SSE)},
2028 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2029 | PTA_PREFETCH_SSE)},
2030 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2031 | PTA_SSE)},
2032 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2033 | PTA_SSE)},
2034 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2035 | PTA_SSE)},
2036 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2037 | PTA_MMX | PTA_SSE | PTA_SSE2
2038 | PTA_NO_SAHF)},
2039 {"k8", PROCESSOR_K8, (PTA_64BIT
2040 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2041 | PTA_SSE | PTA_SSE2
2042 | PTA_NO_SAHF)},
2043 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2044 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2045 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2046 | PTA_NO_SAHF)},
2047 {"opteron", PROCESSOR_K8, (PTA_64BIT
2048 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2049 | PTA_SSE | PTA_SSE2
2050 | PTA_NO_SAHF)},
2051 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2052 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2053 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2054 | PTA_NO_SAHF)},
2055 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2056 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2057 | PTA_SSE | PTA_SSE2
2058 | PTA_NO_SAHF)},
2059 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2060 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2061 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2062 | PTA_NO_SAHF)},
2063 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2064 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2065 | PTA_SSE | PTA_SSE2
2066 | PTA_NO_SAHF)},
2067 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2068 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2069 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2070 | PTA_SSE4A
2071 | PTA_CX16 | PTA_ABM)},
2072 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2073 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2074 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2075 | PTA_SSE4A
2076 | PTA_CX16 | PTA_ABM)},
2077 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2078 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2079 };
2080
2081 int const pta_size = ARRAY_SIZE (processor_alias_table);
2082
2083 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2084 SUBTARGET_OVERRIDE_OPTIONS;
2085 #endif
2086
2087 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2088 SUBSUBTARGET_OVERRIDE_OPTIONS;
2089 #endif
2090
2091 /* -fPIC is the default for x86_64. */
2092 if (TARGET_MACHO && TARGET_64BIT)
2093 flag_pic = 2;
2094
2095 /* Set the default values for switches whose default depends on TARGET_64BIT
2096 in case they weren't overwritten by command line options. */
2097 if (TARGET_64BIT)
2098 {
2099 /* Mach-O doesn't support omitting the frame pointer for now. */
2100 if (flag_omit_frame_pointer == 2)
2101 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2102 if (flag_asynchronous_unwind_tables == 2)
2103 flag_asynchronous_unwind_tables = 1;
2104 if (flag_pcc_struct_return == 2)
2105 flag_pcc_struct_return = 0;
2106 }
2107 else
2108 {
2109 if (flag_omit_frame_pointer == 2)
2110 flag_omit_frame_pointer = 0;
2111 if (flag_asynchronous_unwind_tables == 2)
2112 flag_asynchronous_unwind_tables = 0;
2113 if (flag_pcc_struct_return == 2)
2114 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2115 }
2116
2117 /* Need to check -mtune=generic first. */
2118 if (ix86_tune_string)
2119 {
2120 if (!strcmp (ix86_tune_string, "generic")
2121 || !strcmp (ix86_tune_string, "i686")
2122 /* As special support for cross compilers we read -mtune=native
2123 as -mtune=generic. With native compilers we won't see the
2124 -mtune=native, as it was changed by the driver. */
2125 || !strcmp (ix86_tune_string, "native"))
2126 {
2127 if (TARGET_64BIT)
2128 ix86_tune_string = "generic64";
2129 else
2130 ix86_tune_string = "generic32";
2131 }
2132 else if (!strncmp (ix86_tune_string, "generic", 7))
2133 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2134 }
2135 else
2136 {
2137 if (ix86_arch_string)
2138 ix86_tune_string = ix86_arch_string;
2139 if (!ix86_tune_string)
2140 {
2141 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
2142 ix86_tune_defaulted = 1;
2143 }
2144
2145 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2146 need to use a sensible tune option. */
2147 if (!strcmp (ix86_tune_string, "generic")
2148 || !strcmp (ix86_tune_string, "x86-64")
2149 || !strcmp (ix86_tune_string, "i686"))
2150 {
2151 if (TARGET_64BIT)
2152 ix86_tune_string = "generic64";
2153 else
2154 ix86_tune_string = "generic32";
2155 }
2156 }
2157 if (ix86_stringop_string)
2158 {
2159 if (!strcmp (ix86_stringop_string, "rep_byte"))
2160 stringop_alg = rep_prefix_1_byte;
2161 else if (!strcmp (ix86_stringop_string, "libcall"))
2162 stringop_alg = libcall;
2163 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2164 stringop_alg = rep_prefix_4_byte;
2165 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2166 stringop_alg = rep_prefix_8_byte;
2167 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2168 stringop_alg = loop_1_byte;
2169 else if (!strcmp (ix86_stringop_string, "loop"))
2170 stringop_alg = loop;
2171 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2172 stringop_alg = unrolled_loop;
2173 else
2174 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2175 }
2176 if (!strcmp (ix86_tune_string, "x86-64"))
2177 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2178 "-mtune=generic instead as appropriate.");
2179
2180 if (!ix86_arch_string)
2181 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2182 else
2183 ix86_arch_specified = 1;
2184
2185 if (!strcmp (ix86_arch_string, "generic"))
2186 error ("generic CPU can be used only for -mtune= switch");
2187 if (!strncmp (ix86_arch_string, "generic", 7))
2188 error ("bad value (%s) for -march= switch", ix86_arch_string);
2189
2190 if (ix86_cmodel_string != 0)
2191 {
2192 if (!strcmp (ix86_cmodel_string, "small"))
2193 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2194 else if (!strcmp (ix86_cmodel_string, "medium"))
2195 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2196 else if (!strcmp (ix86_cmodel_string, "large"))
2197 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2198 else if (flag_pic)
2199 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2200 else if (!strcmp (ix86_cmodel_string, "32"))
2201 ix86_cmodel = CM_32;
2202 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2203 ix86_cmodel = CM_KERNEL;
2204 else
2205 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2206 }
2207 else
2208 {
2209 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2210 use of rip-relative addressing. This eliminates fixups that
2211 would otherwise be needed if this object is to be placed in a
2212 DLL, and is essentially just as efficient as direct addressing. */
2213 if (TARGET_64BIT_MS_ABI)
2214 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2215 else if (TARGET_64BIT)
2216 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2217 else
2218 ix86_cmodel = CM_32;
2219 }
2220 if (ix86_asm_string != 0)
2221 {
2222 if (! TARGET_MACHO
2223 && !strcmp (ix86_asm_string, "intel"))
2224 ix86_asm_dialect = ASM_INTEL;
2225 else if (!strcmp (ix86_asm_string, "att"))
2226 ix86_asm_dialect = ASM_ATT;
2227 else
2228 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2229 }
2230 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2231 error ("code model %qs not supported in the %s bit mode",
2232 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2233 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2234 sorry ("%i-bit mode not compiled in",
2235 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2236
2237 for (i = 0; i < pta_size; i++)
2238 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2239 {
2240 ix86_arch = processor_alias_table[i].processor;
2241 /* Default cpu tuning to the architecture. */
2242 ix86_tune = ix86_arch;
2243
2244 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2245 error ("CPU you selected does not support x86-64 "
2246 "instruction set");
2247
2248 if (processor_alias_table[i].flags & PTA_MMX
2249 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2250 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2251 if (processor_alias_table[i].flags & PTA_3DNOW
2252 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2253 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2254 if (processor_alias_table[i].flags & PTA_3DNOW_A
2255 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2256 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2257 if (processor_alias_table[i].flags & PTA_SSE
2258 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2259 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2260 if (processor_alias_table[i].flags & PTA_SSE2
2261 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2262 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2263 if (processor_alias_table[i].flags & PTA_SSE3
2264 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2265 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2266 if (processor_alias_table[i].flags & PTA_SSSE3
2267 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2268 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2269 if (processor_alias_table[i].flags & PTA_SSE4_1
2270 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2271 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2272 if (processor_alias_table[i].flags & PTA_SSE4_2
2273 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2274 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2275 if (processor_alias_table[i].flags & PTA_SSE4A
2276 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2277 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2278 if (processor_alias_table[i].flags & PTA_SSE5
2279 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2280 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2281
2282 if (processor_alias_table[i].flags & PTA_ABM)
2283 x86_abm = true;
2284 if (processor_alias_table[i].flags & PTA_CX16)
2285 x86_cmpxchg16b = true;
2286 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2287 x86_popcnt = true;
2288 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2289 x86_prefetch_sse = true;
2290 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2291 x86_sahf = true;
2292
2293 break;
2294 }
2295
2296 if (i == pta_size)
2297 error ("bad value (%s) for -march= switch", ix86_arch_string);
2298
2299 ix86_arch_mask = 1u << ix86_arch;
2300 for (i = 0; i < X86_ARCH_LAST; ++i)
2301 ix86_arch_features[i] &= ix86_arch_mask;
2302
2303 for (i = 0; i < pta_size; i++)
2304 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2305 {
2306 ix86_tune = processor_alias_table[i].processor;
2307 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2308 {
2309 if (ix86_tune_defaulted)
2310 {
2311 ix86_tune_string = "x86-64";
2312 for (i = 0; i < pta_size; i++)
2313 if (! strcmp (ix86_tune_string,
2314 processor_alias_table[i].name))
2315 break;
2316 ix86_tune = processor_alias_table[i].processor;
2317 }
2318 else
2319 error ("CPU you selected does not support x86-64 "
2320 "instruction set");
2321 }
2322 /* Intel CPUs have always interpreted SSE prefetch instructions as
2323 NOPs; so, we can enable SSE prefetch instructions even when
2324 -mtune (rather than -march) points us to a processor that has them.
2325 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2326 higher processors. */
2327 if (TARGET_CMOVE
2328 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2329 x86_prefetch_sse = true;
2330 break;
2331 }
2332 if (i == pta_size)
2333 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2334
2335 ix86_tune_mask = 1u << ix86_tune;
2336 for (i = 0; i < X86_TUNE_LAST; ++i)
2337 ix86_tune_features[i] &= ix86_tune_mask;
2338
2339 if (optimize_size)
2340 ix86_cost = &size_cost;
2341 else
2342 ix86_cost = processor_target_table[ix86_tune].cost;
2343
2344 /* Arrange to set up i386_stack_locals for all functions. */
2345 init_machine_status = ix86_init_machine_status;
2346
2347 /* Validate -mregparm= value. */
2348 if (ix86_regparm_string)
2349 {
2350 if (TARGET_64BIT)
2351 warning (0, "-mregparm is ignored in 64-bit mode");
2352 i = atoi (ix86_regparm_string);
2353 if (i < 0 || i > REGPARM_MAX)
2354 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2355 else
2356 ix86_regparm = i;
2357 }
2358 if (TARGET_64BIT)
2359 ix86_regparm = REGPARM_MAX;
2360
2361 /* If the user has provided any of the -malign-* options,
2362 warn and use that value only if -falign-* is not set.
2363 Remove this code in GCC 3.2 or later. */
2364 if (ix86_align_loops_string)
2365 {
2366 warning (0, "-malign-loops is obsolete, use -falign-loops");
2367 if (align_loops == 0)
2368 {
2369 i = atoi (ix86_align_loops_string);
2370 if (i < 0 || i > MAX_CODE_ALIGN)
2371 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2372 else
2373 align_loops = 1 << i;
2374 }
2375 }
2376
2377 if (ix86_align_jumps_string)
2378 {
2379 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2380 if (align_jumps == 0)
2381 {
2382 i = atoi (ix86_align_jumps_string);
2383 if (i < 0 || i > MAX_CODE_ALIGN)
2384 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2385 else
2386 align_jumps = 1 << i;
2387 }
2388 }
2389
2390 if (ix86_align_funcs_string)
2391 {
2392 warning (0, "-malign-functions is obsolete, use -falign-functions");
2393 if (align_functions == 0)
2394 {
2395 i = atoi (ix86_align_funcs_string);
2396 if (i < 0 || i > MAX_CODE_ALIGN)
2397 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2398 else
2399 align_functions = 1 << i;
2400 }
2401 }
2402
2403 /* Default align_* from the processor table. */
2404 if (align_loops == 0)
2405 {
2406 align_loops = processor_target_table[ix86_tune].align_loop;
2407 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2408 }
2409 if (align_jumps == 0)
2410 {
2411 align_jumps = processor_target_table[ix86_tune].align_jump;
2412 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2413 }
2414 if (align_functions == 0)
2415 {
2416 align_functions = processor_target_table[ix86_tune].align_func;
2417 }
2418
2419 /* Validate -mbranch-cost= value, or provide default. */
2420 ix86_branch_cost = ix86_cost->branch_cost;
2421 if (ix86_branch_cost_string)
2422 {
2423 i = atoi (ix86_branch_cost_string);
2424 if (i < 0 || i > 5)
2425 error ("-mbranch-cost=%d is not between 0 and 5", i);
2426 else
2427 ix86_branch_cost = i;
2428 }
2429 if (ix86_section_threshold_string)
2430 {
2431 i = atoi (ix86_section_threshold_string);
2432 if (i < 0)
2433 error ("-mlarge-data-threshold=%d is negative", i);
2434 else
2435 ix86_section_threshold = i;
2436 }
2437
2438 if (ix86_tls_dialect_string)
2439 {
2440 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2441 ix86_tls_dialect = TLS_DIALECT_GNU;
2442 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2443 ix86_tls_dialect = TLS_DIALECT_GNU2;
2444 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2445 ix86_tls_dialect = TLS_DIALECT_SUN;
2446 else
2447 error ("bad value (%s) for -mtls-dialect= switch",
2448 ix86_tls_dialect_string);
2449 }
2450
2451 if (ix87_precision_string)
2452 {
2453 i = atoi (ix87_precision_string);
2454 if (i != 32 && i != 64 && i != 80)
2455 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2456 }
2457
2458 if (TARGET_64BIT)
2459 {
2460 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2461
2462 /* Enable by default the SSE and MMX builtins. Do allow the user to
2463 explicitly disable any of these. In particular, disabling SSE and
2464 MMX for kernel code is extremely useful. */
2465 if (!ix86_arch_specified)
2466 ix86_isa_flags
2467 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2468 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2469
2470 if (TARGET_RTD)
2471 warning (0, "-mrtd is ignored in 64bit mode");
2472 }
2473 else
2474 {
2475 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2476
2477 if (!ix86_arch_specified)
2478 ix86_isa_flags
2479 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2480
2481 /* i386 ABI does not specify red zone. It still makes sense to use it
2482 when programmer takes care to stack from being destroyed. */
2483 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2484 target_flags |= MASK_NO_RED_ZONE;
2485 }
2486
2487 /* Keep nonleaf frame pointers. */
2488 if (flag_omit_frame_pointer)
2489 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2490 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2491 flag_omit_frame_pointer = 1;
2492
2493 /* If we're doing fast math, we don't care about comparison order
2494 wrt NaNs. This lets us use a shorter comparison sequence. */
2495 if (flag_finite_math_only)
2496 target_flags &= ~MASK_IEEE_FP;
2497
2498 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2499 since the insns won't need emulation. */
2500 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2501 target_flags &= ~MASK_NO_FANCY_MATH_387;
2502
2503 /* Likewise, if the target doesn't have a 387, or we've specified
2504 software floating point, don't use 387 inline intrinsics. */
2505 if (!TARGET_80387)
2506 target_flags |= MASK_NO_FANCY_MATH_387;
2507
2508 /* Turn on SSE4A bultins for -msse5. */
2509 if (TARGET_SSE5)
2510 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2511
2512 /* Turn on SSE4.1 builtins for -msse4.2. */
2513 if (TARGET_SSE4_2)
2514 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2515
2516 /* Turn on SSSE3 builtins for -msse4.1. */
2517 if (TARGET_SSE4_1)
2518 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2519
2520 /* Turn on SSE3 builtins for -mssse3. */
2521 if (TARGET_SSSE3)
2522 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2523
2524 /* Turn on SSE3 builtins for -msse4a. */
2525 if (TARGET_SSE4A)
2526 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2527
2528 /* Turn on SSE2 builtins for -msse3. */
2529 if (TARGET_SSE3)
2530 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2531
2532 /* Turn on SSE builtins for -msse2. */
2533 if (TARGET_SSE2)
2534 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2535
2536 /* Turn on MMX builtins for -msse. */
2537 if (TARGET_SSE)
2538 {
2539 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2540 x86_prefetch_sse = true;
2541 }
2542
2543 /* Turn on MMX builtins for 3Dnow. */
2544 if (TARGET_3DNOW)
2545 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2546
2547 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2548 if (TARGET_SSE4_2 || TARGET_ABM)
2549 x86_popcnt = true;
2550
2551 /* Validate -mpreferred-stack-boundary= value, or provide default.
2552 The default of 128 bits is for Pentium III's SSE __m128. We can't
2553 change it because of optimize_size. Otherwise, we can't mix object
2554 files compiled with -Os and -On. */
2555 ix86_preferred_stack_boundary = 128;
2556 if (ix86_preferred_stack_boundary_string)
2557 {
2558 i = atoi (ix86_preferred_stack_boundary_string);
2559 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2560 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2561 TARGET_64BIT ? 4 : 2);
2562 else
2563 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2564 }
2565
2566 /* Accept -msseregparm only if at least SSE support is enabled. */
2567 if (TARGET_SSEREGPARM
2568 && ! TARGET_SSE)
2569 error ("-msseregparm used without SSE enabled");
2570
2571 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2572 if (ix86_fpmath_string != 0)
2573 {
2574 if (! strcmp (ix86_fpmath_string, "387"))
2575 ix86_fpmath = FPMATH_387;
2576 else if (! strcmp (ix86_fpmath_string, "sse"))
2577 {
2578 if (!TARGET_SSE)
2579 {
2580 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2581 ix86_fpmath = FPMATH_387;
2582 }
2583 else
2584 ix86_fpmath = FPMATH_SSE;
2585 }
2586 else if (! strcmp (ix86_fpmath_string, "387,sse")
2587 || ! strcmp (ix86_fpmath_string, "sse,387"))
2588 {
2589 if (!TARGET_SSE)
2590 {
2591 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2592 ix86_fpmath = FPMATH_387;
2593 }
2594 else if (!TARGET_80387)
2595 {
2596 warning (0, "387 instruction set disabled, using SSE arithmetics");
2597 ix86_fpmath = FPMATH_SSE;
2598 }
2599 else
2600 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2601 }
2602 else
2603 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2604 }
2605
2606 /* If the i387 is disabled, then do not return values in it. */
2607 if (!TARGET_80387)
2608 target_flags &= ~MASK_FLOAT_RETURNS;
2609
2610 /* Use external vectorized library in vectorizing intrinsics. */
2611 if (ix86_veclibabi_string)
2612 {
2613 if (strcmp (ix86_veclibabi_string, "acml") == 0)
2614 ix86_veclib_handler = ix86_veclibabi_acml;
2615 else
2616 error ("unknown vectorization library ABI type (%s) for "
2617 "-mveclibabi= switch", ix86_veclibabi_string);
2618 }
2619
2620 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2621 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2622 && !optimize_size)
2623 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2624
2625 /* ??? Unwind info is not correct around the CFG unless either a frame
2626 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2627 unwind info generation to be aware of the CFG and propagating states
2628 around edges. */
2629 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2630 || flag_exceptions || flag_non_call_exceptions)
2631 && flag_omit_frame_pointer
2632 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2633 {
2634 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2635 warning (0, "unwind tables currently require either a frame pointer "
2636 "or -maccumulate-outgoing-args for correctness");
2637 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2638 }
2639
2640 /* For sane SSE instruction set generation we need fcomi instruction.
2641 It is safe to enable all CMOVE instructions. */
2642 if (TARGET_SSE)
2643 TARGET_CMOVE = 1;
2644
2645 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2646 {
2647 char *p;
2648 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2649 p = strchr (internal_label_prefix, 'X');
2650 internal_label_prefix_len = p - internal_label_prefix;
2651 *p = '\0';
2652 }
2653
2654 /* When scheduling description is not available, disable scheduler pass
2655 so it won't slow down the compilation and make x87 code slower. */
2656 if (!TARGET_SCHEDULE)
2657 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2658
2659 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2660 set_param_value ("simultaneous-prefetches",
2661 ix86_cost->simultaneous_prefetches);
2662 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2663 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2664 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2665 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2666 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2667 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2668 }
2669 \f
2670 /* Return true if this goes in large data/bss. */
2671
2672 static bool
2673 ix86_in_large_data_p (tree exp)
2674 {
2675 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2676 return false;
2677
2678 /* Functions are never large data. */
2679 if (TREE_CODE (exp) == FUNCTION_DECL)
2680 return false;
2681
2682 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2683 {
2684 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2685 if (strcmp (section, ".ldata") == 0
2686 || strcmp (section, ".lbss") == 0)
2687 return true;
2688 return false;
2689 }
2690 else
2691 {
2692 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2693
2694 /* If this is an incomplete type with size 0, then we can't put it
2695 in data because it might be too big when completed. */
2696 if (!size || size > ix86_section_threshold)
2697 return true;
2698 }
2699
2700 return false;
2701 }
2702
2703 /* Switch to the appropriate section for output of DECL.
2704 DECL is either a `VAR_DECL' node or a constant of some sort.
2705 RELOC indicates whether forming the initial value of DECL requires
2706 link-time relocations. */
2707
2708 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2709 ATTRIBUTE_UNUSED;
2710
2711 static section *
2712 x86_64_elf_select_section (tree decl, int reloc,
2713 unsigned HOST_WIDE_INT align)
2714 {
2715 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2716 && ix86_in_large_data_p (decl))
2717 {
2718 const char *sname = NULL;
2719 unsigned int flags = SECTION_WRITE;
2720 switch (categorize_decl_for_section (decl, reloc))
2721 {
2722 case SECCAT_DATA:
2723 sname = ".ldata";
2724 break;
2725 case SECCAT_DATA_REL:
2726 sname = ".ldata.rel";
2727 break;
2728 case SECCAT_DATA_REL_LOCAL:
2729 sname = ".ldata.rel.local";
2730 break;
2731 case SECCAT_DATA_REL_RO:
2732 sname = ".ldata.rel.ro";
2733 break;
2734 case SECCAT_DATA_REL_RO_LOCAL:
2735 sname = ".ldata.rel.ro.local";
2736 break;
2737 case SECCAT_BSS:
2738 sname = ".lbss";
2739 flags |= SECTION_BSS;
2740 break;
2741 case SECCAT_RODATA:
2742 case SECCAT_RODATA_MERGE_STR:
2743 case SECCAT_RODATA_MERGE_STR_INIT:
2744 case SECCAT_RODATA_MERGE_CONST:
2745 sname = ".lrodata";
2746 flags = 0;
2747 break;
2748 case SECCAT_SRODATA:
2749 case SECCAT_SDATA:
2750 case SECCAT_SBSS:
2751 gcc_unreachable ();
2752 case SECCAT_TEXT:
2753 case SECCAT_TDATA:
2754 case SECCAT_TBSS:
2755 /* We don't split these for medium model. Place them into
2756 default sections and hope for best. */
2757 break;
2758 }
2759 if (sname)
2760 {
2761 /* We might get called with string constants, but get_named_section
2762 doesn't like them as they are not DECLs. Also, we need to set
2763 flags in that case. */
2764 if (!DECL_P (decl))
2765 return get_section (sname, flags, NULL);
2766 return get_named_section (decl, sname, reloc);
2767 }
2768 }
2769 return default_elf_select_section (decl, reloc, align);
2770 }
2771
2772 /* Build up a unique section name, expressed as a
2773 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2774 RELOC indicates whether the initial value of EXP requires
2775 link-time relocations. */
2776
2777 static void ATTRIBUTE_UNUSED
2778 x86_64_elf_unique_section (tree decl, int reloc)
2779 {
2780 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2781 && ix86_in_large_data_p (decl))
2782 {
2783 const char *prefix = NULL;
2784 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2785 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2786
2787 switch (categorize_decl_for_section (decl, reloc))
2788 {
2789 case SECCAT_DATA:
2790 case SECCAT_DATA_REL:
2791 case SECCAT_DATA_REL_LOCAL:
2792 case SECCAT_DATA_REL_RO:
2793 case SECCAT_DATA_REL_RO_LOCAL:
2794 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2795 break;
2796 case SECCAT_BSS:
2797 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2798 break;
2799 case SECCAT_RODATA:
2800 case SECCAT_RODATA_MERGE_STR:
2801 case SECCAT_RODATA_MERGE_STR_INIT:
2802 case SECCAT_RODATA_MERGE_CONST:
2803 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2804 break;
2805 case SECCAT_SRODATA:
2806 case SECCAT_SDATA:
2807 case SECCAT_SBSS:
2808 gcc_unreachable ();
2809 case SECCAT_TEXT:
2810 case SECCAT_TDATA:
2811 case SECCAT_TBSS:
2812 /* We don't split these for medium model. Place them into
2813 default sections and hope for best. */
2814 break;
2815 }
2816 if (prefix)
2817 {
2818 const char *name;
2819 size_t nlen, plen;
2820 char *string;
2821 plen = strlen (prefix);
2822
2823 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2824 name = targetm.strip_name_encoding (name);
2825 nlen = strlen (name);
2826
2827 string = (char *) alloca (nlen + plen + 1);
2828 memcpy (string, prefix, plen);
2829 memcpy (string + plen, name, nlen + 1);
2830
2831 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2832 return;
2833 }
2834 }
2835 default_unique_section (decl, reloc);
2836 }
2837
2838 #ifdef COMMON_ASM_OP
2839 /* This says how to output assembler code to declare an
2840 uninitialized external linkage data object.
2841
2842 For medium model x86-64 we need to use .largecomm opcode for
2843 large objects. */
2844 void
2845 x86_elf_aligned_common (FILE *file,
2846 const char *name, unsigned HOST_WIDE_INT size,
2847 int align)
2848 {
2849 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2850 && size > (unsigned int)ix86_section_threshold)
2851 fprintf (file, ".largecomm\t");
2852 else
2853 fprintf (file, "%s", COMMON_ASM_OP);
2854 assemble_name (file, name);
2855 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2856 size, align / BITS_PER_UNIT);
2857 }
2858 #endif
2859
2860 /* Utility function for targets to use in implementing
2861 ASM_OUTPUT_ALIGNED_BSS. */
2862
2863 void
2864 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2865 const char *name, unsigned HOST_WIDE_INT size,
2866 int align)
2867 {
2868 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2869 && size > (unsigned int)ix86_section_threshold)
2870 switch_to_section (get_named_section (decl, ".lbss", 0));
2871 else
2872 switch_to_section (bss_section);
2873 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2874 #ifdef ASM_DECLARE_OBJECT_NAME
2875 last_assemble_variable_decl = decl;
2876 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2877 #else
2878 /* Standard thing is just output label for the object. */
2879 ASM_OUTPUT_LABEL (file, name);
2880 #endif /* ASM_DECLARE_OBJECT_NAME */
2881 ASM_OUTPUT_SKIP (file, size ? size : 1);
2882 }
2883 \f
2884 void
2885 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2886 {
2887 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2888 make the problem with not enough registers even worse. */
2889 #ifdef INSN_SCHEDULING
2890 if (level > 1)
2891 flag_schedule_insns = 0;
2892 #endif
2893
2894 if (TARGET_MACHO)
2895 /* The Darwin libraries never set errno, so we might as well
2896 avoid calling them when that's the only reason we would. */
2897 flag_errno_math = 0;
2898
2899 /* The default values of these switches depend on the TARGET_64BIT
2900 that is not known at this moment. Mark these values with 2 and
2901 let user the to override these. In case there is no command line option
2902 specifying them, we will set the defaults in override_options. */
2903 if (optimize >= 1)
2904 flag_omit_frame_pointer = 2;
2905 flag_pcc_struct_return = 2;
2906 flag_asynchronous_unwind_tables = 2;
2907 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2908 SUBTARGET_OPTIMIZATION_OPTIONS;
2909 #endif
2910 }
2911 \f
2912 /* Decide whether we can make a sibling call to a function. DECL is the
2913 declaration of the function being targeted by the call and EXP is the
2914 CALL_EXPR representing the call. */
2915
2916 static bool
2917 ix86_function_ok_for_sibcall (tree decl, tree exp)
2918 {
2919 tree func;
2920 rtx a, b;
2921
2922 /* If we are generating position-independent code, we cannot sibcall
2923 optimize any indirect call, or a direct call to a global function,
2924 as the PLT requires %ebx be live. */
2925 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2926 return false;
2927
2928 if (decl)
2929 func = decl;
2930 else
2931 {
2932 func = TREE_TYPE (CALL_EXPR_FN (exp));
2933 if (POINTER_TYPE_P (func))
2934 func = TREE_TYPE (func);
2935 }
2936
2937 /* Check that the return value locations are the same. Like
2938 if we are returning floats on the 80387 register stack, we cannot
2939 make a sibcall from a function that doesn't return a float to a
2940 function that does or, conversely, from a function that does return
2941 a float to a function that doesn't; the necessary stack adjustment
2942 would not be executed. This is also the place we notice
2943 differences in the return value ABI. Note that it is ok for one
2944 of the functions to have void return type as long as the return
2945 value of the other is passed in a register. */
2946 a = ix86_function_value (TREE_TYPE (exp), func, false);
2947 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2948 cfun->decl, false);
2949 if (STACK_REG_P (a) || STACK_REG_P (b))
2950 {
2951 if (!rtx_equal_p (a, b))
2952 return false;
2953 }
2954 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2955 ;
2956 else if (!rtx_equal_p (a, b))
2957 return false;
2958
2959 /* If this call is indirect, we'll need to be able to use a call-clobbered
2960 register for the address of the target function. Make sure that all
2961 such registers are not used for passing parameters. */
2962 if (!decl && !TARGET_64BIT)
2963 {
2964 tree type;
2965
2966 /* We're looking at the CALL_EXPR, we need the type of the function. */
2967 type = CALL_EXPR_FN (exp); /* pointer expression */
2968 type = TREE_TYPE (type); /* pointer type */
2969 type = TREE_TYPE (type); /* function type */
2970
2971 if (ix86_function_regparm (type, NULL) >= 3)
2972 {
2973 /* ??? Need to count the actual number of registers to be used,
2974 not the possible number of registers. Fix later. */
2975 return false;
2976 }
2977 }
2978
2979 /* Dllimport'd functions are also called indirectly. */
2980 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2981 && decl && DECL_DLLIMPORT_P (decl)
2982 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2983 return false;
2984
2985 /* If we forced aligned the stack, then sibcalling would unalign the
2986 stack, which may break the called function. */
2987 if (cfun->machine->force_align_arg_pointer)
2988 return false;
2989
2990 /* Otherwise okay. That also includes certain types of indirect calls. */
2991 return true;
2992 }
2993
2994 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2995 calling convention attributes;
2996 arguments as in struct attribute_spec.handler. */
2997
2998 static tree
2999 ix86_handle_cconv_attribute (tree *node, tree name,
3000 tree args,
3001 int flags ATTRIBUTE_UNUSED,
3002 bool *no_add_attrs)
3003 {
3004 if (TREE_CODE (*node) != FUNCTION_TYPE
3005 && TREE_CODE (*node) != METHOD_TYPE
3006 && TREE_CODE (*node) != FIELD_DECL
3007 && TREE_CODE (*node) != TYPE_DECL)
3008 {
3009 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3010 IDENTIFIER_POINTER (name));
3011 *no_add_attrs = true;
3012 return NULL_TREE;
3013 }
3014
3015 /* Can combine regparm with all attributes but fastcall. */
3016 if (is_attribute_p ("regparm", name))
3017 {
3018 tree cst;
3019
3020 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3021 {
3022 error ("fastcall and regparm attributes are not compatible");
3023 }
3024
3025 cst = TREE_VALUE (args);
3026 if (TREE_CODE (cst) != INTEGER_CST)
3027 {
3028 warning (OPT_Wattributes,
3029 "%qs attribute requires an integer constant argument",
3030 IDENTIFIER_POINTER (name));
3031 *no_add_attrs = true;
3032 }
3033 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
3034 {
3035 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
3036 IDENTIFIER_POINTER (name), REGPARM_MAX);
3037 *no_add_attrs = true;
3038 }
3039
3040 if (!TARGET_64BIT
3041 && lookup_attribute (ix86_force_align_arg_pointer_string,
3042 TYPE_ATTRIBUTES (*node))
3043 && compare_tree_int (cst, REGPARM_MAX-1))
3044 {
3045 error ("%s functions limited to %d register parameters",
3046 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
3047 }
3048
3049 return NULL_TREE;
3050 }
3051
3052 if (TARGET_64BIT)
3053 {
3054 /* Do not warn when emulating the MS ABI. */
3055 if (!TARGET_64BIT_MS_ABI)
3056 warning (OPT_Wattributes, "%qs attribute ignored",
3057 IDENTIFIER_POINTER (name));
3058 *no_add_attrs = true;
3059 return NULL_TREE;
3060 }
3061
3062 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
3063 if (is_attribute_p ("fastcall", name))
3064 {
3065 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3066 {
3067 error ("fastcall and cdecl attributes are not compatible");
3068 }
3069 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3070 {
3071 error ("fastcall and stdcall attributes are not compatible");
3072 }
3073 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
3074 {
3075 error ("fastcall and regparm attributes are not compatible");
3076 }
3077 }
3078
3079 /* Can combine stdcall with fastcall (redundant), regparm and
3080 sseregparm. */
3081 else if (is_attribute_p ("stdcall", name))
3082 {
3083 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3084 {
3085 error ("stdcall and cdecl attributes are not compatible");
3086 }
3087 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3088 {
3089 error ("stdcall and fastcall attributes are not compatible");
3090 }
3091 }
3092
3093 /* Can combine cdecl with regparm and sseregparm. */
3094 else if (is_attribute_p ("cdecl", name))
3095 {
3096 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3097 {
3098 error ("stdcall and cdecl attributes are not compatible");
3099 }
3100 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3101 {
3102 error ("fastcall and cdecl attributes are not compatible");
3103 }
3104 }
3105
3106 /* Can combine sseregparm with all attributes. */
3107
3108 return NULL_TREE;
3109 }
3110
3111 /* Return 0 if the attributes for two types are incompatible, 1 if they
3112 are compatible, and 2 if they are nearly compatible (which causes a
3113 warning to be generated). */
3114
3115 static int
3116 ix86_comp_type_attributes (const_tree type1, const_tree type2)
3117 {
3118 /* Check for mismatch of non-default calling convention. */
3119 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
3120
3121 if (TREE_CODE (type1) != FUNCTION_TYPE)
3122 return 1;
3123
3124 /* Check for mismatched fastcall/regparm types. */
3125 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
3126 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
3127 || (ix86_function_regparm (type1, NULL)
3128 != ix86_function_regparm (type2, NULL)))
3129 return 0;
3130
3131 /* Check for mismatched sseregparm types. */
3132 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
3133 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
3134 return 0;
3135
3136 /* Check for mismatched return types (cdecl vs stdcall). */
3137 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
3138 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
3139 return 0;
3140
3141 return 1;
3142 }
3143 \f
3144 /* Return the regparm value for a function with the indicated TYPE and DECL.
3145 DECL may be NULL when calling function indirectly
3146 or considering a libcall. */
3147
3148 static int
3149 ix86_function_regparm (const_tree type, const_tree decl)
3150 {
3151 tree attr;
3152 int regparm = ix86_regparm;
3153
3154 if (TARGET_64BIT)
3155 return regparm;
3156
3157 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3158 if (attr)
3159 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3160
3161 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3162 return 2;
3163
3164 /* Use register calling convention for local functions when possible. */
3165 if (decl && TREE_CODE (decl) == FUNCTION_DECL
3166 && flag_unit_at_a_time && !profile_flag)
3167 {
3168 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3169 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3170 if (i && i->local)
3171 {
3172 int local_regparm, globals = 0, regno;
3173 struct function *f;
3174
3175 /* Make sure no regparm register is taken by a
3176 global register variable. */
3177 for (local_regparm = 0; local_regparm < 3; local_regparm++)
3178 if (global_regs[local_regparm])
3179 break;
3180
3181 /* We can't use regparm(3) for nested functions as these use
3182 static chain pointer in third argument. */
3183 if (local_regparm == 3
3184 && (decl_function_context (decl)
3185 || ix86_force_align_arg_pointer)
3186 && !DECL_NO_STATIC_CHAIN (decl))
3187 local_regparm = 2;
3188
3189 /* If the function realigns its stackpointer, the prologue will
3190 clobber %ecx. If we've already generated code for the callee,
3191 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3192 scanning the attributes for the self-realigning property. */
3193 f = DECL_STRUCT_FUNCTION (decl);
3194 if (local_regparm == 3
3195 && (f ? !!f->machine->force_align_arg_pointer
3196 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3197 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3198 local_regparm = 2;
3199
3200 /* Each global register variable increases register preassure,
3201 so the more global reg vars there are, the smaller regparm
3202 optimization use, unless requested by the user explicitly. */
3203 for (regno = 0; regno < 6; regno++)
3204 if (global_regs[regno])
3205 globals++;
3206 local_regparm
3207 = globals < local_regparm ? local_regparm - globals : 0;
3208
3209 if (local_regparm > regparm)
3210 regparm = local_regparm;
3211 }
3212 }
3213
3214 return regparm;
3215 }
3216
3217 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3218 DFmode (2) arguments in SSE registers for a function with the
3219 indicated TYPE and DECL. DECL may be NULL when calling function
3220 indirectly or considering a libcall. Otherwise return 0. */
3221
3222 static int
3223 ix86_function_sseregparm (const_tree type, const_tree decl)
3224 {
3225 gcc_assert (!TARGET_64BIT);
3226
3227 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3228 by the sseregparm attribute. */
3229 if (TARGET_SSEREGPARM
3230 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3231 {
3232 if (!TARGET_SSE)
3233 {
3234 if (decl)
3235 error ("Calling %qD with attribute sseregparm without "
3236 "SSE/SSE2 enabled", decl);
3237 else
3238 error ("Calling %qT with attribute sseregparm without "
3239 "SSE/SSE2 enabled", type);
3240 return 0;
3241 }
3242
3243 return 2;
3244 }
3245
3246 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3247 (and DFmode for SSE2) arguments in SSE registers. */
3248 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3249 {
3250 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3251 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3252 if (i && i->local)
3253 return TARGET_SSE2 ? 2 : 1;
3254 }
3255
3256 return 0;
3257 }
3258
3259 /* Return true if EAX is live at the start of the function. Used by
3260 ix86_expand_prologue to determine if we need special help before
3261 calling allocate_stack_worker. */
3262
3263 static bool
3264 ix86_eax_live_at_start_p (void)
3265 {
3266 /* Cheat. Don't bother working forward from ix86_function_regparm
3267 to the function type to whether an actual argument is located in
3268 eax. Instead just look at cfg info, which is still close enough
3269 to correct at this point. This gives false positives for broken
3270 functions that might use uninitialized data that happens to be
3271 allocated in eax, but who cares? */
3272 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3273 }
3274
3275 /* Value is the number of bytes of arguments automatically
3276 popped when returning from a subroutine call.
3277 FUNDECL is the declaration node of the function (as a tree),
3278 FUNTYPE is the data type of the function (as a tree),
3279 or for a library call it is an identifier node for the subroutine name.
3280 SIZE is the number of bytes of arguments passed on the stack.
3281
3282 On the 80386, the RTD insn may be used to pop them if the number
3283 of args is fixed, but if the number is variable then the caller
3284 must pop them all. RTD can't be used for library calls now
3285 because the library is compiled with the Unix compiler.
3286 Use of RTD is a selectable option, since it is incompatible with
3287 standard Unix calling sequences. If the option is not selected,
3288 the caller must always pop the args.
3289
3290 The attribute stdcall is equivalent to RTD on a per module basis. */
3291
3292 int
3293 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3294 {
3295 int rtd;
3296
3297 /* None of the 64-bit ABIs pop arguments. */
3298 if (TARGET_64BIT)
3299 return 0;
3300
3301 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3302
3303 /* Cdecl functions override -mrtd, and never pop the stack. */
3304 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3305 {
3306 /* Stdcall and fastcall functions will pop the stack if not
3307 variable args. */
3308 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3309 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3310 rtd = 1;
3311
3312 if (rtd && ! stdarg_p (funtype))
3313 return size;
3314 }
3315
3316 /* Lose any fake structure return argument if it is passed on the stack. */
3317 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3318 && !KEEP_AGGREGATE_RETURN_POINTER)
3319 {
3320 int nregs = ix86_function_regparm (funtype, fundecl);
3321 if (nregs == 0)
3322 return GET_MODE_SIZE (Pmode);
3323 }
3324
3325 return 0;
3326 }
3327 \f
3328 /* Argument support functions. */
3329
3330 /* Return true when register may be used to pass function parameters. */
3331 bool
3332 ix86_function_arg_regno_p (int regno)
3333 {
3334 int i;
3335 const int *parm_regs;
3336
3337 if (!TARGET_64BIT)
3338 {
3339 if (TARGET_MACHO)
3340 return (regno < REGPARM_MAX
3341 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3342 else
3343 return (regno < REGPARM_MAX
3344 || (TARGET_MMX && MMX_REGNO_P (regno)
3345 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3346 || (TARGET_SSE && SSE_REGNO_P (regno)
3347 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3348 }
3349
3350 if (TARGET_MACHO)
3351 {
3352 if (SSE_REGNO_P (regno) && TARGET_SSE)
3353 return true;
3354 }
3355 else
3356 {
3357 if (TARGET_SSE && SSE_REGNO_P (regno)
3358 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3359 return true;
3360 }
3361
3362 /* RAX is used as hidden argument to va_arg functions. */
3363 if (!TARGET_64BIT_MS_ABI && regno == 0)
3364 return true;
3365
3366 if (TARGET_64BIT_MS_ABI)
3367 parm_regs = x86_64_ms_abi_int_parameter_registers;
3368 else
3369 parm_regs = x86_64_int_parameter_registers;
3370 for (i = 0; i < REGPARM_MAX; i++)
3371 if (regno == parm_regs[i])
3372 return true;
3373 return false;
3374 }
3375
3376 /* Return if we do not know how to pass TYPE solely in registers. */
3377
3378 static bool
3379 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
3380 {
3381 if (must_pass_in_stack_var_size_or_pad (mode, type))
3382 return true;
3383
3384 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3385 The layout_type routine is crafty and tries to trick us into passing
3386 currently unsupported vector types on the stack by using TImode. */
3387 return (!TARGET_64BIT && mode == TImode
3388 && type && TREE_CODE (type) != VECTOR_TYPE);
3389 }
3390
3391 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3392 for a call to a function whose data type is FNTYPE.
3393 For a library call, FNTYPE is 0. */
3394
3395 void
3396 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3397 tree fntype, /* tree ptr for function decl */
3398 rtx libname, /* SYMBOL_REF of library name or 0 */
3399 tree fndecl)
3400 {
3401 memset (cum, 0, sizeof (*cum));
3402
3403 /* Set up the number of registers to use for passing arguments. */
3404 cum->nregs = ix86_regparm;
3405 if (TARGET_SSE)
3406 cum->sse_nregs = SSE_REGPARM_MAX;
3407 if (TARGET_MMX)
3408 cum->mmx_nregs = MMX_REGPARM_MAX;
3409 cum->warn_sse = true;
3410 cum->warn_mmx = true;
3411 cum->maybe_vaarg = (fntype
3412 ? (!prototype_p (fntype) || stdarg_p (fntype))
3413 : !libname);
3414
3415 if (!TARGET_64BIT)
3416 {
3417 /* If there are variable arguments, then we won't pass anything
3418 in registers in 32-bit mode. */
3419 if (cum->maybe_vaarg)
3420 {
3421 cum->nregs = 0;
3422 cum->sse_nregs = 0;
3423 cum->mmx_nregs = 0;
3424 cum->warn_sse = 0;
3425 cum->warn_mmx = 0;
3426 return;
3427 }
3428
3429 /* Use ecx and edx registers if function has fastcall attribute,
3430 else look for regparm information. */
3431 if (fntype)
3432 {
3433 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3434 {
3435 cum->nregs = 2;
3436 cum->fastcall = 1;
3437 }
3438 else
3439 cum->nregs = ix86_function_regparm (fntype, fndecl);
3440 }
3441
3442 /* Set up the number of SSE registers used for passing SFmode
3443 and DFmode arguments. Warn for mismatching ABI. */
3444 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3445 }
3446 }
3447
3448 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3449 But in the case of vector types, it is some vector mode.
3450
3451 When we have only some of our vector isa extensions enabled, then there
3452 are some modes for which vector_mode_supported_p is false. For these
3453 modes, the generic vector support in gcc will choose some non-vector mode
3454 in order to implement the type. By computing the natural mode, we'll
3455 select the proper ABI location for the operand and not depend on whatever
3456 the middle-end decides to do with these vector types. */
3457
3458 static enum machine_mode
3459 type_natural_mode (const_tree type)
3460 {
3461 enum machine_mode mode = TYPE_MODE (type);
3462
3463 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3464 {
3465 HOST_WIDE_INT size = int_size_in_bytes (type);
3466 if ((size == 8 || size == 16)
3467 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3468 && TYPE_VECTOR_SUBPARTS (type) > 1)
3469 {
3470 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3471
3472 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3473 mode = MIN_MODE_VECTOR_FLOAT;
3474 else
3475 mode = MIN_MODE_VECTOR_INT;
3476
3477 /* Get the mode which has this inner mode and number of units. */
3478 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3479 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3480 && GET_MODE_INNER (mode) == innermode)
3481 return mode;
3482
3483 gcc_unreachable ();
3484 }
3485 }
3486
3487 return mode;
3488 }
3489
3490 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3491 this may not agree with the mode that the type system has chosen for the
3492 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3493 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3494
3495 static rtx
3496 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3497 unsigned int regno)
3498 {
3499 rtx tmp;
3500
3501 if (orig_mode != BLKmode)
3502 tmp = gen_rtx_REG (orig_mode, regno);
3503 else
3504 {
3505 tmp = gen_rtx_REG (mode, regno);
3506 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3507 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3508 }
3509
3510 return tmp;
3511 }
3512
3513 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3514 of this code is to classify each 8bytes of incoming argument by the register
3515 class and assign registers accordingly. */
3516
3517 /* Return the union class of CLASS1 and CLASS2.
3518 See the x86-64 PS ABI for details. */
3519
3520 static enum x86_64_reg_class
3521 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3522 {
3523 /* Rule #1: If both classes are equal, this is the resulting class. */
3524 if (class1 == class2)
3525 return class1;
3526
3527 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3528 the other class. */
3529 if (class1 == X86_64_NO_CLASS)
3530 return class2;
3531 if (class2 == X86_64_NO_CLASS)
3532 return class1;
3533
3534 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3535 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3536 return X86_64_MEMORY_CLASS;
3537
3538 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3539 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3540 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3541 return X86_64_INTEGERSI_CLASS;
3542 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3543 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3544 return X86_64_INTEGER_CLASS;
3545
3546 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3547 MEMORY is used. */
3548 if (class1 == X86_64_X87_CLASS
3549 || class1 == X86_64_X87UP_CLASS
3550 || class1 == X86_64_COMPLEX_X87_CLASS
3551 || class2 == X86_64_X87_CLASS
3552 || class2 == X86_64_X87UP_CLASS
3553 || class2 == X86_64_COMPLEX_X87_CLASS)
3554 return X86_64_MEMORY_CLASS;
3555
3556 /* Rule #6: Otherwise class SSE is used. */
3557 return X86_64_SSE_CLASS;
3558 }
3559
3560 /* Classify the argument of type TYPE and mode MODE.
3561 CLASSES will be filled by the register class used to pass each word
3562 of the operand. The number of words is returned. In case the parameter
3563 should be passed in memory, 0 is returned. As a special case for zero
3564 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3565
3566 BIT_OFFSET is used internally for handling records and specifies offset
3567 of the offset in bits modulo 256 to avoid overflow cases.
3568
3569 See the x86-64 PS ABI for details.
3570 */
3571
3572 static int
3573 classify_argument (enum machine_mode mode, const_tree type,
3574 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3575 {
3576 HOST_WIDE_INT bytes =
3577 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3578 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3579
3580 /* Variable sized entities are always passed/returned in memory. */
3581 if (bytes < 0)
3582 return 0;
3583
3584 if (mode != VOIDmode
3585 && targetm.calls.must_pass_in_stack (mode, type))
3586 return 0;
3587
3588 if (type && AGGREGATE_TYPE_P (type))
3589 {
3590 int i;
3591 tree field;
3592 enum x86_64_reg_class subclasses[MAX_CLASSES];
3593
3594 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3595 if (bytes > 16)
3596 return 0;
3597
3598 for (i = 0; i < words; i++)
3599 classes[i] = X86_64_NO_CLASS;
3600
3601 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3602 signalize memory class, so handle it as special case. */
3603 if (!words)
3604 {
3605 classes[0] = X86_64_NO_CLASS;
3606 return 1;
3607 }
3608
3609 /* Classify each field of record and merge classes. */
3610 switch (TREE_CODE (type))
3611 {
3612 case RECORD_TYPE:
3613 /* And now merge the fields of structure. */
3614 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3615 {
3616 if (TREE_CODE (field) == FIELD_DECL)
3617 {
3618 int num;
3619
3620 if (TREE_TYPE (field) == error_mark_node)
3621 continue;
3622
3623 /* Bitfields are always classified as integer. Handle them
3624 early, since later code would consider them to be
3625 misaligned integers. */
3626 if (DECL_BIT_FIELD (field))
3627 {
3628 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3629 i < ((int_bit_position (field) + (bit_offset % 64))
3630 + tree_low_cst (DECL_SIZE (field), 0)
3631 + 63) / 8 / 8; i++)
3632 classes[i] =
3633 merge_classes (X86_64_INTEGER_CLASS,
3634 classes[i]);
3635 }
3636 else
3637 {
3638 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3639 TREE_TYPE (field), subclasses,
3640 (int_bit_position (field)
3641 + bit_offset) % 256);
3642 if (!num)
3643 return 0;
3644 for (i = 0; i < num; i++)
3645 {
3646 int pos =
3647 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3648 classes[i + pos] =
3649 merge_classes (subclasses[i], classes[i + pos]);
3650 }
3651 }
3652 }
3653 }
3654 break;
3655
3656 case ARRAY_TYPE:
3657 /* Arrays are handled as small records. */
3658 {
3659 int num;
3660 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3661 TREE_TYPE (type), subclasses, bit_offset);
3662 if (!num)
3663 return 0;
3664
3665 /* The partial classes are now full classes. */
3666 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3667 subclasses[0] = X86_64_SSE_CLASS;
3668 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3669 subclasses[0] = X86_64_INTEGER_CLASS;
3670
3671 for (i = 0; i < words; i++)
3672 classes[i] = subclasses[i % num];
3673
3674 break;
3675 }
3676 case UNION_TYPE:
3677 case QUAL_UNION_TYPE:
3678 /* Unions are similar to RECORD_TYPE but offset is always 0.
3679 */
3680 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3681 {
3682 if (TREE_CODE (field) == FIELD_DECL)
3683 {
3684 int num;
3685
3686 if (TREE_TYPE (field) == error_mark_node)
3687 continue;
3688
3689 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3690 TREE_TYPE (field), subclasses,
3691 bit_offset);
3692 if (!num)
3693 return 0;
3694 for (i = 0; i < num; i++)
3695 classes[i] = merge_classes (subclasses[i], classes[i]);
3696 }
3697 }
3698 break;
3699
3700 default:
3701 gcc_unreachable ();
3702 }
3703
3704 /* Final merger cleanup. */
3705 for (i = 0; i < words; i++)
3706 {
3707 /* If one class is MEMORY, everything should be passed in
3708 memory. */
3709 if (classes[i] == X86_64_MEMORY_CLASS)
3710 return 0;
3711
3712 /* The X86_64_SSEUP_CLASS should be always preceded by
3713 X86_64_SSE_CLASS. */
3714 if (classes[i] == X86_64_SSEUP_CLASS
3715 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3716 classes[i] = X86_64_SSE_CLASS;
3717
3718 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3719 if (classes[i] == X86_64_X87UP_CLASS
3720 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3721 classes[i] = X86_64_SSE_CLASS;
3722 }
3723 return words;
3724 }
3725
3726 /* Compute alignment needed. We align all types to natural boundaries with
3727 exception of XFmode that is aligned to 64bits. */
3728 if (mode != VOIDmode && mode != BLKmode)
3729 {
3730 int mode_alignment = GET_MODE_BITSIZE (mode);
3731
3732 if (mode == XFmode)
3733 mode_alignment = 128;
3734 else if (mode == XCmode)
3735 mode_alignment = 256;
3736 if (COMPLEX_MODE_P (mode))
3737 mode_alignment /= 2;
3738 /* Misaligned fields are always returned in memory. */
3739 if (bit_offset % mode_alignment)
3740 return 0;
3741 }
3742
3743 /* for V1xx modes, just use the base mode */
3744 if (VECTOR_MODE_P (mode)
3745 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3746 mode = GET_MODE_INNER (mode);
3747
3748 /* Classification of atomic types. */
3749 switch (mode)
3750 {
3751 case SDmode:
3752 case DDmode:
3753 classes[0] = X86_64_SSE_CLASS;
3754 return 1;
3755 case TDmode:
3756 classes[0] = X86_64_SSE_CLASS;
3757 classes[1] = X86_64_SSEUP_CLASS;
3758 return 2;
3759 case DImode:
3760 case SImode:
3761 case HImode:
3762 case QImode:
3763 case CSImode:
3764 case CHImode:
3765 case CQImode:
3766 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3767 classes[0] = X86_64_INTEGERSI_CLASS;
3768 else
3769 classes[0] = X86_64_INTEGER_CLASS;
3770 return 1;
3771 case CDImode:
3772 case TImode:
3773 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3774 return 2;
3775 case CTImode:
3776 return 0;
3777 case SFmode:
3778 if (!(bit_offset % 64))
3779 classes[0] = X86_64_SSESF_CLASS;
3780 else
3781 classes[0] = X86_64_SSE_CLASS;
3782 return 1;
3783 case DFmode:
3784 classes[0] = X86_64_SSEDF_CLASS;
3785 return 1;
3786 case XFmode:
3787 classes[0] = X86_64_X87_CLASS;
3788 classes[1] = X86_64_X87UP_CLASS;
3789 return 2;
3790 case TFmode:
3791 classes[0] = X86_64_SSE_CLASS;
3792 classes[1] = X86_64_SSEUP_CLASS;
3793 return 2;
3794 case SCmode:
3795 classes[0] = X86_64_SSE_CLASS;
3796 return 1;
3797 case DCmode:
3798 classes[0] = X86_64_SSEDF_CLASS;
3799 classes[1] = X86_64_SSEDF_CLASS;
3800 return 2;
3801 case XCmode:
3802 classes[0] = X86_64_COMPLEX_X87_CLASS;
3803 return 1;
3804 case TCmode:
3805 /* This modes is larger than 16 bytes. */
3806 return 0;
3807 case V4SFmode:
3808 case V4SImode:
3809 case V16QImode:
3810 case V8HImode:
3811 case V2DFmode:
3812 case V2DImode:
3813 classes[0] = X86_64_SSE_CLASS;
3814 classes[1] = X86_64_SSEUP_CLASS;
3815 return 2;
3816 case V2SFmode:
3817 case V2SImode:
3818 case V4HImode:
3819 case V8QImode:
3820 classes[0] = X86_64_SSE_CLASS;
3821 return 1;
3822 case BLKmode:
3823 case VOIDmode:
3824 return 0;
3825 default:
3826 gcc_assert (VECTOR_MODE_P (mode));
3827
3828 if (bytes > 16)
3829 return 0;
3830
3831 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3832
3833 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3834 classes[0] = X86_64_INTEGERSI_CLASS;
3835 else
3836 classes[0] = X86_64_INTEGER_CLASS;
3837 classes[1] = X86_64_INTEGER_CLASS;
3838 return 1 + (bytes > 8);
3839 }
3840 }
3841
3842 /* Examine the argument and return set number of register required in each
3843 class. Return 0 iff parameter should be passed in memory. */
3844 static int
3845 examine_argument (enum machine_mode mode, const_tree type, int in_return,
3846 int *int_nregs, int *sse_nregs)
3847 {
3848 enum x86_64_reg_class regclass[MAX_CLASSES];
3849 int n = classify_argument (mode, type, regclass, 0);
3850
3851 *int_nregs = 0;
3852 *sse_nregs = 0;
3853 if (!n)
3854 return 0;
3855 for (n--; n >= 0; n--)
3856 switch (regclass[n])
3857 {
3858 case X86_64_INTEGER_CLASS:
3859 case X86_64_INTEGERSI_CLASS:
3860 (*int_nregs)++;
3861 break;
3862 case X86_64_SSE_CLASS:
3863 case X86_64_SSESF_CLASS:
3864 case X86_64_SSEDF_CLASS:
3865 (*sse_nregs)++;
3866 break;
3867 case X86_64_NO_CLASS:
3868 case X86_64_SSEUP_CLASS:
3869 break;
3870 case X86_64_X87_CLASS:
3871 case X86_64_X87UP_CLASS:
3872 if (!in_return)
3873 return 0;
3874 break;
3875 case X86_64_COMPLEX_X87_CLASS:
3876 return in_return ? 2 : 0;
3877 case X86_64_MEMORY_CLASS:
3878 gcc_unreachable ();
3879 }
3880 return 1;
3881 }
3882
3883 /* Construct container for the argument used by GCC interface. See
3884 FUNCTION_ARG for the detailed description. */
3885
3886 static rtx
3887 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3888 const_tree type, int in_return, int nintregs, int nsseregs,
3889 const int *intreg, int sse_regno)
3890 {
3891 /* The following variables hold the static issued_error state. */
3892 static bool issued_sse_arg_error;
3893 static bool issued_sse_ret_error;
3894 static bool issued_x87_ret_error;
3895
3896 enum machine_mode tmpmode;
3897 int bytes =
3898 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3899 enum x86_64_reg_class regclass[MAX_CLASSES];
3900 int n;
3901 int i;
3902 int nexps = 0;
3903 int needed_sseregs, needed_intregs;
3904 rtx exp[MAX_CLASSES];
3905 rtx ret;
3906
3907 n = classify_argument (mode, type, regclass, 0);
3908 if (!n)
3909 return NULL;
3910 if (!examine_argument (mode, type, in_return, &needed_intregs,
3911 &needed_sseregs))
3912 return NULL;
3913 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3914 return NULL;
3915
3916 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3917 some less clueful developer tries to use floating-point anyway. */
3918 if (needed_sseregs && !TARGET_SSE)
3919 {
3920 if (in_return)
3921 {
3922 if (!issued_sse_ret_error)
3923 {
3924 error ("SSE register return with SSE disabled");
3925 issued_sse_ret_error = true;
3926 }
3927 }
3928 else if (!issued_sse_arg_error)
3929 {
3930 error ("SSE register argument with SSE disabled");
3931 issued_sse_arg_error = true;
3932 }
3933 return NULL;
3934 }
3935
3936 /* Likewise, error if the ABI requires us to return values in the
3937 x87 registers and the user specified -mno-80387. */
3938 if (!TARGET_80387 && in_return)
3939 for (i = 0; i < n; i++)
3940 if (regclass[i] == X86_64_X87_CLASS
3941 || regclass[i] == X86_64_X87UP_CLASS
3942 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
3943 {
3944 if (!issued_x87_ret_error)
3945 {
3946 error ("x87 register return with x87 disabled");
3947 issued_x87_ret_error = true;
3948 }
3949 return NULL;
3950 }
3951
3952 /* First construct simple cases. Avoid SCmode, since we want to use
3953 single register to pass this type. */
3954 if (n == 1 && mode != SCmode)
3955 switch (regclass[0])
3956 {
3957 case X86_64_INTEGER_CLASS:
3958 case X86_64_INTEGERSI_CLASS:
3959 return gen_rtx_REG (mode, intreg[0]);
3960 case X86_64_SSE_CLASS:
3961 case X86_64_SSESF_CLASS:
3962 case X86_64_SSEDF_CLASS:
3963 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3964 case X86_64_X87_CLASS:
3965 case X86_64_COMPLEX_X87_CLASS:
3966 return gen_rtx_REG (mode, FIRST_STACK_REG);
3967 case X86_64_NO_CLASS:
3968 /* Zero sized array, struct or class. */
3969 return NULL;
3970 default:
3971 gcc_unreachable ();
3972 }
3973 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
3974 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
3975 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3976
3977 if (n == 2
3978 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
3979 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3980 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
3981 && regclass[1] == X86_64_INTEGER_CLASS
3982 && (mode == CDImode || mode == TImode || mode == TFmode)
3983 && intreg[0] + 1 == intreg[1])
3984 return gen_rtx_REG (mode, intreg[0]);
3985
3986 /* Otherwise figure out the entries of the PARALLEL. */
3987 for (i = 0; i < n; i++)
3988 {
3989 switch (regclass[i])
3990 {
3991 case X86_64_NO_CLASS:
3992 break;
3993 case X86_64_INTEGER_CLASS:
3994 case X86_64_INTEGERSI_CLASS:
3995 /* Merge TImodes on aligned occasions here too. */
3996 if (i * 8 + 8 > bytes)
3997 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3998 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
3999 tmpmode = SImode;
4000 else
4001 tmpmode = DImode;
4002 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4003 if (tmpmode == BLKmode)
4004 tmpmode = DImode;
4005 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4006 gen_rtx_REG (tmpmode, *intreg),
4007 GEN_INT (i*8));
4008 intreg++;
4009 break;
4010 case X86_64_SSESF_CLASS:
4011 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4012 gen_rtx_REG (SFmode,
4013 SSE_REGNO (sse_regno)),
4014 GEN_INT (i*8));
4015 sse_regno++;
4016 break;
4017 case X86_64_SSEDF_CLASS:
4018 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4019 gen_rtx_REG (DFmode,
4020 SSE_REGNO (sse_regno)),
4021 GEN_INT (i*8));
4022 sse_regno++;
4023 break;
4024 case X86_64_SSE_CLASS:
4025 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
4026 tmpmode = TImode;
4027 else
4028 tmpmode = DImode;
4029 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4030 gen_rtx_REG (tmpmode,
4031 SSE_REGNO (sse_regno)),
4032 GEN_INT (i*8));
4033 if (tmpmode == TImode)
4034 i++;
4035 sse_regno++;
4036 break;
4037 default:
4038 gcc_unreachable ();
4039 }
4040 }
4041
4042 /* Empty aligned struct, union or class. */
4043 if (nexps == 0)
4044 return NULL;
4045
4046 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
4047 for (i = 0; i < nexps; i++)
4048 XVECEXP (ret, 0, i) = exp [i];
4049 return ret;
4050 }
4051
4052 /* Update the data in CUM to advance over an argument of mode MODE
4053 and data type TYPE. (TYPE is null for libcalls where that information
4054 may not be available.) */
4055
4056 static void
4057 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4058 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4059 {
4060 switch (mode)
4061 {
4062 default:
4063 break;
4064
4065 case BLKmode:
4066 if (bytes < 0)
4067 break;
4068 /* FALLTHRU */
4069
4070 case DImode:
4071 case SImode:
4072 case HImode:
4073 case QImode:
4074 cum->words += words;
4075 cum->nregs -= words;
4076 cum->regno += words;
4077
4078 if (cum->nregs <= 0)
4079 {
4080 cum->nregs = 0;
4081 cum->regno = 0;
4082 }
4083 break;
4084
4085 case DFmode:
4086 if (cum->float_in_sse < 2)
4087 break;
4088 case SFmode:
4089 if (cum->float_in_sse < 1)
4090 break;
4091 /* FALLTHRU */
4092
4093 case TImode:
4094 case V16QImode:
4095 case V8HImode:
4096 case V4SImode:
4097 case V2DImode:
4098 case V4SFmode:
4099 case V2DFmode:
4100 if (!type || !AGGREGATE_TYPE_P (type))
4101 {
4102 cum->sse_words += words;
4103 cum->sse_nregs -= 1;
4104 cum->sse_regno += 1;
4105 if (cum->sse_nregs <= 0)
4106 {
4107 cum->sse_nregs = 0;
4108 cum->sse_regno = 0;
4109 }
4110 }
4111 break;
4112
4113 case V8QImode:
4114 case V4HImode:
4115 case V2SImode:
4116 case V2SFmode:
4117 if (!type || !AGGREGATE_TYPE_P (type))
4118 {
4119 cum->mmx_words += words;
4120 cum->mmx_nregs -= 1;
4121 cum->mmx_regno += 1;
4122 if (cum->mmx_nregs <= 0)
4123 {
4124 cum->mmx_nregs = 0;
4125 cum->mmx_regno = 0;
4126 }
4127 }
4128 break;
4129 }
4130 }
4131
4132 static void
4133 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4134 tree type, HOST_WIDE_INT words)
4135 {
4136 int int_nregs, sse_nregs;
4137
4138 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
4139 cum->words += words;
4140 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
4141 {
4142 cum->nregs -= int_nregs;
4143 cum->sse_nregs -= sse_nregs;
4144 cum->regno += int_nregs;
4145 cum->sse_regno += sse_nregs;
4146 }
4147 else
4148 cum->words += words;
4149 }
4150
4151 static void
4152 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
4153 HOST_WIDE_INT words)
4154 {
4155 /* Otherwise, this should be passed indirect. */
4156 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
4157
4158 cum->words += words;
4159 if (cum->nregs > 0)
4160 {
4161 cum->nregs -= 1;
4162 cum->regno += 1;
4163 }
4164 }
4165
4166 void
4167 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4168 tree type, int named ATTRIBUTE_UNUSED)
4169 {
4170 HOST_WIDE_INT bytes, words;
4171
4172 if (mode == BLKmode)
4173 bytes = int_size_in_bytes (type);
4174 else
4175 bytes = GET_MODE_SIZE (mode);
4176 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4177
4178 if (type)
4179 mode = type_natural_mode (type);
4180
4181 if (TARGET_64BIT_MS_ABI)
4182 function_arg_advance_ms_64 (cum, bytes, words);
4183 else if (TARGET_64BIT)
4184 function_arg_advance_64 (cum, mode, type, words);
4185 else
4186 function_arg_advance_32 (cum, mode, type, bytes, words);
4187 }
4188
4189 /* Define where to put the arguments to a function.
4190 Value is zero to push the argument on the stack,
4191 or a hard register in which to store the argument.
4192
4193 MODE is the argument's machine mode.
4194 TYPE is the data type of the argument (as a tree).
4195 This is null for libcalls where that information may
4196 not be available.
4197 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4198 the preceding args and about the function being called.
4199 NAMED is nonzero if this argument is a named parameter
4200 (otherwise it is an extra parameter matching an ellipsis). */
4201
4202 static rtx
4203 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4204 enum machine_mode orig_mode, tree type,
4205 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4206 {
4207 static bool warnedsse, warnedmmx;
4208
4209 /* Avoid the AL settings for the Unix64 ABI. */
4210 if (mode == VOIDmode)
4211 return constm1_rtx;
4212
4213 switch (mode)
4214 {
4215 default:
4216 break;
4217
4218 case BLKmode:
4219 if (bytes < 0)
4220 break;
4221 /* FALLTHRU */
4222 case DImode:
4223 case SImode:
4224 case HImode:
4225 case QImode:
4226 if (words <= cum->nregs)
4227 {
4228 int regno = cum->regno;
4229
4230 /* Fastcall allocates the first two DWORD (SImode) or
4231 smaller arguments to ECX and EDX. */
4232 if (cum->fastcall)
4233 {
4234 if (mode == BLKmode || mode == DImode)
4235 break;
4236
4237 /* ECX not EAX is the first allocated register. */
4238 if (regno == 0)
4239 regno = 2;
4240 }
4241 return gen_rtx_REG (mode, regno);
4242 }
4243 break;
4244
4245 case DFmode:
4246 if (cum->float_in_sse < 2)
4247 break;
4248 case SFmode:
4249 if (cum->float_in_sse < 1)
4250 break;
4251 /* FALLTHRU */
4252 case TImode:
4253 case V16QImode:
4254 case V8HImode:
4255 case V4SImode:
4256 case V2DImode:
4257 case V4SFmode:
4258 case V2DFmode:
4259 if (!type || !AGGREGATE_TYPE_P (type))
4260 {
4261 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4262 {
4263 warnedsse = true;
4264 warning (0, "SSE vector argument without SSE enabled "
4265 "changes the ABI");
4266 }
4267 if (cum->sse_nregs)
4268 return gen_reg_or_parallel (mode, orig_mode,
4269 cum->sse_regno + FIRST_SSE_REG);
4270 }
4271 break;
4272
4273 case V8QImode:
4274 case V4HImode:
4275 case V2SImode:
4276 case V2SFmode:
4277 if (!type || !AGGREGATE_TYPE_P (type))
4278 {
4279 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4280 {
4281 warnedmmx = true;
4282 warning (0, "MMX vector argument without MMX enabled "
4283 "changes the ABI");
4284 }
4285 if (cum->mmx_nregs)
4286 return gen_reg_or_parallel (mode, orig_mode,
4287 cum->mmx_regno + FIRST_MMX_REG);
4288 }
4289 break;
4290 }
4291
4292 return NULL_RTX;
4293 }
4294
4295 static rtx
4296 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4297 enum machine_mode orig_mode, tree type)
4298 {
4299 /* Handle a hidden AL argument containing number of registers
4300 for varargs x86-64 functions. */
4301 if (mode == VOIDmode)
4302 return GEN_INT (cum->maybe_vaarg
4303 ? (cum->sse_nregs < 0
4304 ? SSE_REGPARM_MAX
4305 : cum->sse_regno)
4306 : -1);
4307
4308 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4309 cum->sse_nregs,
4310 &x86_64_int_parameter_registers [cum->regno],
4311 cum->sse_regno);
4312 }
4313
4314 static rtx
4315 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4316 enum machine_mode orig_mode, int named)
4317 {
4318 unsigned int regno;
4319
4320 /* Avoid the AL settings for the Unix64 ABI. */
4321 if (mode == VOIDmode)
4322 return constm1_rtx;
4323
4324 /* If we've run out of registers, it goes on the stack. */
4325 if (cum->nregs == 0)
4326 return NULL_RTX;
4327
4328 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4329
4330 /* Only floating point modes are passed in anything but integer regs. */
4331 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4332 {
4333 if (named)
4334 regno = cum->regno + FIRST_SSE_REG;
4335 else
4336 {
4337 rtx t1, t2;
4338
4339 /* Unnamed floating parameters are passed in both the
4340 SSE and integer registers. */
4341 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4342 t2 = gen_rtx_REG (mode, regno);
4343 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4344 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4345 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4346 }
4347 }
4348
4349 return gen_reg_or_parallel (mode, orig_mode, regno);
4350 }
4351
4352 rtx
4353 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4354 tree type, int named)
4355 {
4356 enum machine_mode mode = omode;
4357 HOST_WIDE_INT bytes, words;
4358
4359 if (mode == BLKmode)
4360 bytes = int_size_in_bytes (type);
4361 else
4362 bytes = GET_MODE_SIZE (mode);
4363 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4364
4365 /* To simplify the code below, represent vector types with a vector mode
4366 even if MMX/SSE are not active. */
4367 if (type && TREE_CODE (type) == VECTOR_TYPE)
4368 mode = type_natural_mode (type);
4369
4370 if (TARGET_64BIT_MS_ABI)
4371 return function_arg_ms_64 (cum, mode, omode, named);
4372 else if (TARGET_64BIT)
4373 return function_arg_64 (cum, mode, omode, type);
4374 else
4375 return function_arg_32 (cum, mode, omode, type, bytes, words);
4376 }
4377
4378 /* A C expression that indicates when an argument must be passed by
4379 reference. If nonzero for an argument, a copy of that argument is
4380 made in memory and a pointer to the argument is passed instead of
4381 the argument itself. The pointer is passed in whatever way is
4382 appropriate for passing a pointer to that type. */
4383
4384 static bool
4385 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4386 enum machine_mode mode ATTRIBUTE_UNUSED,
4387 const_tree type, bool named ATTRIBUTE_UNUSED)
4388 {
4389 if (TARGET_64BIT_MS_ABI)
4390 {
4391 if (type)
4392 {
4393 /* Arrays are passed by reference. */
4394 if (TREE_CODE (type) == ARRAY_TYPE)
4395 return true;
4396
4397 if (AGGREGATE_TYPE_P (type))
4398 {
4399 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4400 are passed by reference. */
4401 int el2 = exact_log2 (int_size_in_bytes (type));
4402 return !(el2 >= 0 && el2 <= 3);
4403 }
4404 }
4405
4406 /* __m128 is passed by reference. */
4407 /* ??? How to handle complex? For now treat them as structs,
4408 and pass them by reference if they're too large. */
4409 if (GET_MODE_SIZE (mode) > 8)
4410 return true;
4411 }
4412 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4413 return 1;
4414
4415 return 0;
4416 }
4417
4418 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4419 ABI. Only called if TARGET_SSE. */
4420 static bool
4421 contains_128bit_aligned_vector_p (tree type)
4422 {
4423 enum machine_mode mode = TYPE_MODE (type);
4424 if (SSE_REG_MODE_P (mode)
4425 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4426 return true;
4427 if (TYPE_ALIGN (type) < 128)
4428 return false;
4429
4430 if (AGGREGATE_TYPE_P (type))
4431 {
4432 /* Walk the aggregates recursively. */
4433 switch (TREE_CODE (type))
4434 {
4435 case RECORD_TYPE:
4436 case UNION_TYPE:
4437 case QUAL_UNION_TYPE:
4438 {
4439 tree field;
4440
4441 /* Walk all the structure fields. */
4442 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4443 {
4444 if (TREE_CODE (field) == FIELD_DECL
4445 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4446 return true;
4447 }
4448 break;
4449 }
4450
4451 case ARRAY_TYPE:
4452 /* Just for use if some languages passes arrays by value. */
4453 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4454 return true;
4455 break;
4456
4457 default:
4458 gcc_unreachable ();
4459 }
4460 }
4461 return false;
4462 }
4463
4464 /* Gives the alignment boundary, in bits, of an argument with the
4465 specified mode and type. */
4466
4467 int
4468 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4469 {
4470 int align;
4471 if (type)
4472 align = TYPE_ALIGN (type);
4473 else
4474 align = GET_MODE_ALIGNMENT (mode);
4475 if (align < PARM_BOUNDARY)
4476 align = PARM_BOUNDARY;
4477 if (!TARGET_64BIT)
4478 {
4479 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4480 make an exception for SSE modes since these require 128bit
4481 alignment.
4482
4483 The handling here differs from field_alignment. ICC aligns MMX
4484 arguments to 4 byte boundaries, while structure fields are aligned
4485 to 8 byte boundaries. */
4486 if (!TARGET_SSE)
4487 align = PARM_BOUNDARY;
4488 else if (!type)
4489 {
4490 if (!SSE_REG_MODE_P (mode))
4491 align = PARM_BOUNDARY;
4492 }
4493 else
4494 {
4495 if (!contains_128bit_aligned_vector_p (type))
4496 align = PARM_BOUNDARY;
4497 }
4498 }
4499 if (align > 128)
4500 align = 128;
4501 return align;
4502 }
4503
4504 /* Return true if N is a possible register number of function value. */
4505
4506 bool
4507 ix86_function_value_regno_p (int regno)
4508 {
4509 switch (regno)
4510 {
4511 case 0:
4512 return true;
4513
4514 case FIRST_FLOAT_REG:
4515 if (TARGET_64BIT_MS_ABI)
4516 return false;
4517 return TARGET_FLOAT_RETURNS_IN_80387;
4518
4519 case FIRST_SSE_REG:
4520 return TARGET_SSE;
4521
4522 case FIRST_MMX_REG:
4523 if (TARGET_MACHO || TARGET_64BIT)
4524 return false;
4525 return TARGET_MMX;
4526 }
4527
4528 return false;
4529 }
4530
4531 /* Define how to find the value returned by a function.
4532 VALTYPE is the data type of the value (as a tree).
4533 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4534 otherwise, FUNC is 0. */
4535
4536 static rtx
4537 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4538 const_tree fntype, const_tree fn)
4539 {
4540 unsigned int regno;
4541
4542 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4543 we normally prevent this case when mmx is not available. However
4544 some ABIs may require the result to be returned like DImode. */
4545 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4546 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4547
4548 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4549 we prevent this case when sse is not available. However some ABIs
4550 may require the result to be returned like integer TImode. */
4551 else if (mode == TImode
4552 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4553 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4554
4555 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4556 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4557 regno = FIRST_FLOAT_REG;
4558 else
4559 /* Most things go in %eax. */
4560 regno = 0;
4561
4562 /* Override FP return register with %xmm0 for local functions when
4563 SSE math is enabled or for functions with sseregparm attribute. */
4564 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4565 {
4566 int sse_level = ix86_function_sseregparm (fntype, fn);
4567 if ((sse_level >= 1 && mode == SFmode)
4568 || (sse_level == 2 && mode == DFmode))
4569 regno = FIRST_SSE_REG;
4570 }
4571
4572 return gen_rtx_REG (orig_mode, regno);
4573 }
4574
4575 static rtx
4576 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4577 const_tree valtype)
4578 {
4579 rtx ret;
4580
4581 /* Handle libcalls, which don't provide a type node. */
4582 if (valtype == NULL)
4583 {
4584 switch (mode)
4585 {
4586 case SFmode:
4587 case SCmode:
4588 case DFmode:
4589 case DCmode:
4590 case TFmode:
4591 case SDmode:
4592 case DDmode:
4593 case TDmode:
4594 return gen_rtx_REG (mode, FIRST_SSE_REG);
4595 case XFmode:
4596 case XCmode:
4597 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4598 case TCmode:
4599 return NULL;
4600 default:
4601 return gen_rtx_REG (mode, 0);
4602 }
4603 }
4604
4605 ret = construct_container (mode, orig_mode, valtype, 1,
4606 REGPARM_MAX, SSE_REGPARM_MAX,
4607 x86_64_int_return_registers, 0);
4608
4609 /* For zero sized structures, construct_container returns NULL, but we
4610 need to keep rest of compiler happy by returning meaningful value. */
4611 if (!ret)
4612 ret = gen_rtx_REG (orig_mode, 0);
4613
4614 return ret;
4615 }
4616
4617 static rtx
4618 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4619 {
4620 unsigned int regno = 0;
4621
4622 if (TARGET_SSE)
4623 {
4624 if (mode == SFmode || mode == DFmode)
4625 regno = FIRST_SSE_REG;
4626 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4627 regno = FIRST_SSE_REG;
4628 }
4629
4630 return gen_rtx_REG (orig_mode, regno);
4631 }
4632
4633 static rtx
4634 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4635 enum machine_mode orig_mode, enum machine_mode mode)
4636 {
4637 const_tree fn, fntype;
4638
4639 fn = NULL_TREE;
4640 if (fntype_or_decl && DECL_P (fntype_or_decl))
4641 fn = fntype_or_decl;
4642 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4643
4644 if (TARGET_64BIT_MS_ABI)
4645 return function_value_ms_64 (orig_mode, mode);
4646 else if (TARGET_64BIT)
4647 return function_value_64 (orig_mode, mode, valtype);
4648 else
4649 return function_value_32 (orig_mode, mode, fntype, fn);
4650 }
4651
4652 static rtx
4653 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
4654 bool outgoing ATTRIBUTE_UNUSED)
4655 {
4656 enum machine_mode mode, orig_mode;
4657
4658 orig_mode = TYPE_MODE (valtype);
4659 mode = type_natural_mode (valtype);
4660 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4661 }
4662
4663 rtx
4664 ix86_libcall_value (enum machine_mode mode)
4665 {
4666 return ix86_function_value_1 (NULL, NULL, mode, mode);
4667 }
4668
4669 /* Return true iff type is returned in memory. */
4670
4671 static int
4672 return_in_memory_32 (const_tree type, enum machine_mode mode)
4673 {
4674 HOST_WIDE_INT size;
4675
4676 if (mode == BLKmode)
4677 return 1;
4678
4679 size = int_size_in_bytes (type);
4680
4681 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4682 return 0;
4683
4684 if (VECTOR_MODE_P (mode) || mode == TImode)
4685 {
4686 /* User-created vectors small enough to fit in EAX. */
4687 if (size < 8)
4688 return 0;
4689
4690 /* MMX/3dNow values are returned in MM0,
4691 except when it doesn't exits. */
4692 if (size == 8)
4693 return (TARGET_MMX ? 0 : 1);
4694
4695 /* SSE values are returned in XMM0, except when it doesn't exist. */
4696 if (size == 16)
4697 return (TARGET_SSE ? 0 : 1);
4698 }
4699
4700 if (mode == XFmode)
4701 return 0;
4702
4703 if (mode == TDmode)
4704 return 1;
4705
4706 if (size > 12)
4707 return 1;
4708 return 0;
4709 }
4710
4711 static int
4712 return_in_memory_64 (const_tree type, enum machine_mode mode)
4713 {
4714 int needed_intregs, needed_sseregs;
4715 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4716 }
4717
4718 static int
4719 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
4720 {
4721 HOST_WIDE_INT size = int_size_in_bytes (type);
4722
4723 /* __m128 and friends are returned in xmm0. */
4724 if (size == 16 && VECTOR_MODE_P (mode))
4725 return 0;
4726
4727 /* Otherwise, the size must be exactly in [1248]. */
4728 return (size != 1 && size != 2 && size != 4 && size != 8);
4729 }
4730
4731 int
4732 ix86_return_in_memory (const_tree type)
4733 {
4734 const enum machine_mode mode = type_natural_mode (type);
4735
4736 if (TARGET_64BIT_MS_ABI)
4737 return return_in_memory_ms_64 (type, mode);
4738 else if (TARGET_64BIT)
4739 return return_in_memory_64 (type, mode);
4740 else
4741 return return_in_memory_32 (type, mode);
4742 }
4743
4744 /* Return false iff TYPE is returned in memory. This version is used
4745 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4746 but differs notably in that when MMX is available, 8-byte vectors
4747 are returned in memory, rather than in MMX registers. */
4748
4749 int
4750 ix86_sol10_return_in_memory (const_tree type)
4751 {
4752 int size;
4753 enum machine_mode mode = type_natural_mode (type);
4754
4755 if (TARGET_64BIT)
4756 return return_in_memory_64 (type, mode);
4757
4758 if (mode == BLKmode)
4759 return 1;
4760
4761 size = int_size_in_bytes (type);
4762
4763 if (VECTOR_MODE_P (mode))
4764 {
4765 /* Return in memory only if MMX registers *are* available. This
4766 seems backwards, but it is consistent with the existing
4767 Solaris x86 ABI. */
4768 if (size == 8)
4769 return TARGET_MMX;
4770 if (size == 16)
4771 return !TARGET_SSE;
4772 }
4773 else if (mode == TImode)
4774 return !TARGET_SSE;
4775 else if (mode == XFmode)
4776 return 0;
4777
4778 return size > 12;
4779 }
4780
4781 /* When returning SSE vector types, we have a choice of either
4782 (1) being abi incompatible with a -march switch, or
4783 (2) generating an error.
4784 Given no good solution, I think the safest thing is one warning.
4785 The user won't be able to use -Werror, but....
4786
4787 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4788 called in response to actually generating a caller or callee that
4789 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4790 via aggregate_value_p for general type probing from tree-ssa. */
4791
4792 static rtx
4793 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4794 {
4795 static bool warnedsse, warnedmmx;
4796
4797 if (!TARGET_64BIT && type)
4798 {
4799 /* Look at the return type of the function, not the function type. */
4800 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4801
4802 if (!TARGET_SSE && !warnedsse)
4803 {
4804 if (mode == TImode
4805 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4806 {
4807 warnedsse = true;
4808 warning (0, "SSE vector return without SSE enabled "
4809 "changes the ABI");
4810 }
4811 }
4812
4813 if (!TARGET_MMX && !warnedmmx)
4814 {
4815 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4816 {
4817 warnedmmx = true;
4818 warning (0, "MMX vector return without MMX enabled "
4819 "changes the ABI");
4820 }
4821 }
4822 }
4823
4824 return NULL;
4825 }
4826
4827 \f
4828 /* Create the va_list data type. */
4829
4830 static tree
4831 ix86_build_builtin_va_list (void)
4832 {
4833 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4834
4835 /* For i386 we use plain pointer to argument area. */
4836 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4837 return build_pointer_type (char_type_node);
4838
4839 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4840 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4841
4842 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4843 unsigned_type_node);
4844 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4845 unsigned_type_node);
4846 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4847 ptr_type_node);
4848 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4849 ptr_type_node);
4850
4851 va_list_gpr_counter_field = f_gpr;
4852 va_list_fpr_counter_field = f_fpr;
4853
4854 DECL_FIELD_CONTEXT (f_gpr) = record;
4855 DECL_FIELD_CONTEXT (f_fpr) = record;
4856 DECL_FIELD_CONTEXT (f_ovf) = record;
4857 DECL_FIELD_CONTEXT (f_sav) = record;
4858
4859 TREE_CHAIN (record) = type_decl;
4860 TYPE_NAME (record) = type_decl;
4861 TYPE_FIELDS (record) = f_gpr;
4862 TREE_CHAIN (f_gpr) = f_fpr;
4863 TREE_CHAIN (f_fpr) = f_ovf;
4864 TREE_CHAIN (f_ovf) = f_sav;
4865
4866 layout_type (record);
4867
4868 /* The correct type is an array type of one element. */
4869 return build_array_type (record, build_index_type (size_zero_node));
4870 }
4871
4872 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4873
4874 static void
4875 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4876 {
4877 rtx save_area, mem;
4878 rtx label;
4879 rtx label_ref;
4880 rtx tmp_reg;
4881 rtx nsse_reg;
4882 alias_set_type set;
4883 int i;
4884
4885 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4886 return;
4887
4888 /* Indicate to allocate space on the stack for varargs save area. */
4889 ix86_save_varrargs_registers = 1;
4890 /* We need 16-byte stack alignment to save SSE registers. If user
4891 asked for lower preferred_stack_boundary, lets just hope that he knows
4892 what he is doing and won't varargs SSE values.
4893
4894 We also may end up assuming that only 64bit values are stored in SSE
4895 register let some floating point program work. */
4896 if (ix86_preferred_stack_boundary >= 128)
4897 cfun->stack_alignment_needed = 128;
4898
4899 save_area = frame_pointer_rtx;
4900 set = get_varargs_alias_set ();
4901
4902 for (i = cum->regno;
4903 i < ix86_regparm
4904 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4905 i++)
4906 {
4907 mem = gen_rtx_MEM (Pmode,
4908 plus_constant (save_area, i * UNITS_PER_WORD));
4909 MEM_NOTRAP_P (mem) = 1;
4910 set_mem_alias_set (mem, set);
4911 emit_move_insn (mem, gen_rtx_REG (Pmode,
4912 x86_64_int_parameter_registers[i]));
4913 }
4914
4915 if (cum->sse_nregs && cfun->va_list_fpr_size)
4916 {
4917 /* Now emit code to save SSE registers. The AX parameter contains number
4918 of SSE parameter registers used to call this function. We use
4919 sse_prologue_save insn template that produces computed jump across
4920 SSE saves. We need some preparation work to get this working. */
4921
4922 label = gen_label_rtx ();
4923 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4924
4925 /* Compute address to jump to :
4926 label - 5*eax + nnamed_sse_arguments*5 */
4927 tmp_reg = gen_reg_rtx (Pmode);
4928 nsse_reg = gen_reg_rtx (Pmode);
4929 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4930 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4931 gen_rtx_MULT (Pmode, nsse_reg,
4932 GEN_INT (4))));
4933 if (cum->sse_regno)
4934 emit_move_insn
4935 (nsse_reg,
4936 gen_rtx_CONST (DImode,
4937 gen_rtx_PLUS (DImode,
4938 label_ref,
4939 GEN_INT (cum->sse_regno * 4))));
4940 else
4941 emit_move_insn (nsse_reg, label_ref);
4942 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4943
4944 /* Compute address of memory block we save into. We always use pointer
4945 pointing 127 bytes after first byte to store - this is needed to keep
4946 instruction size limited by 4 bytes. */
4947 tmp_reg = gen_reg_rtx (Pmode);
4948 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4949 plus_constant (save_area,
4950 8 * REGPARM_MAX + 127)));
4951 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4952 MEM_NOTRAP_P (mem) = 1;
4953 set_mem_alias_set (mem, set);
4954 set_mem_align (mem, BITS_PER_WORD);
4955
4956 /* And finally do the dirty job! */
4957 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4958 GEN_INT (cum->sse_regno), label));
4959 }
4960 }
4961
4962 static void
4963 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4964 {
4965 alias_set_type set = get_varargs_alias_set ();
4966 int i;
4967
4968 for (i = cum->regno; i < REGPARM_MAX; i++)
4969 {
4970 rtx reg, mem;
4971
4972 mem = gen_rtx_MEM (Pmode,
4973 plus_constant (virtual_incoming_args_rtx,
4974 i * UNITS_PER_WORD));
4975 MEM_NOTRAP_P (mem) = 1;
4976 set_mem_alias_set (mem, set);
4977
4978 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4979 emit_move_insn (mem, reg);
4980 }
4981 }
4982
4983 static void
4984 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4985 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4986 int no_rtl)
4987 {
4988 CUMULATIVE_ARGS next_cum;
4989 tree fntype;
4990
4991 /* This argument doesn't appear to be used anymore. Which is good,
4992 because the old code here didn't suppress rtl generation. */
4993 gcc_assert (!no_rtl);
4994
4995 if (!TARGET_64BIT)
4996 return;
4997
4998 fntype = TREE_TYPE (current_function_decl);
4999
5000 /* For varargs, we do not want to skip the dummy va_dcl argument.
5001 For stdargs, we do want to skip the last named argument. */
5002 next_cum = *cum;
5003 if (stdarg_p (fntype))
5004 function_arg_advance (&next_cum, mode, type, 1);
5005
5006 if (TARGET_64BIT_MS_ABI)
5007 setup_incoming_varargs_ms_64 (&next_cum);
5008 else
5009 setup_incoming_varargs_64 (&next_cum);
5010 }
5011
5012 /* Implement va_start. */
5013
5014 void
5015 ix86_va_start (tree valist, rtx nextarg)
5016 {
5017 HOST_WIDE_INT words, n_gpr, n_fpr;
5018 tree f_gpr, f_fpr, f_ovf, f_sav;
5019 tree gpr, fpr, ovf, sav, t;
5020 tree type;
5021
5022 /* Only 64bit target needs something special. */
5023 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5024 {
5025 std_expand_builtin_va_start (valist, nextarg);
5026 return;
5027 }
5028
5029 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5030 f_fpr = TREE_CHAIN (f_gpr);
5031 f_ovf = TREE_CHAIN (f_fpr);
5032 f_sav = TREE_CHAIN (f_ovf);
5033
5034 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
5035 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5036 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5037 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5038 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5039
5040 /* Count number of gp and fp argument registers used. */
5041 words = current_function_args_info.words;
5042 n_gpr = current_function_args_info.regno;
5043 n_fpr = current_function_args_info.sse_regno;
5044
5045 if (cfun->va_list_gpr_size)
5046 {
5047 type = TREE_TYPE (gpr);
5048 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
5049 build_int_cst (type, n_gpr * 8));
5050 TREE_SIDE_EFFECTS (t) = 1;
5051 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5052 }
5053
5054 if (cfun->va_list_fpr_size)
5055 {
5056 type = TREE_TYPE (fpr);
5057 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
5058 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
5059 TREE_SIDE_EFFECTS (t) = 1;
5060 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5061 }
5062
5063 /* Find the overflow area. */
5064 type = TREE_TYPE (ovf);
5065 t = make_tree (type, virtual_incoming_args_rtx);
5066 if (words != 0)
5067 t = build2 (POINTER_PLUS_EXPR, type, t,
5068 size_int (words * UNITS_PER_WORD));
5069 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
5070 TREE_SIDE_EFFECTS (t) = 1;
5071 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5072
5073 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
5074 {
5075 /* Find the register save area.
5076 Prologue of the function save it right above stack frame. */
5077 type = TREE_TYPE (sav);
5078 t = make_tree (type, frame_pointer_rtx);
5079 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
5080 TREE_SIDE_EFFECTS (t) = 1;
5081 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5082 }
5083 }
5084
5085 /* Implement va_arg. */
5086
5087 static tree
5088 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
5089 {
5090 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
5091 tree f_gpr, f_fpr, f_ovf, f_sav;
5092 tree gpr, fpr, ovf, sav, t;
5093 int size, rsize;
5094 tree lab_false, lab_over = NULL_TREE;
5095 tree addr, t2;
5096 rtx container;
5097 int indirect_p = 0;
5098 tree ptrtype;
5099 enum machine_mode nat_mode;
5100
5101 /* Only 64bit target needs something special. */
5102 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5103 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5104
5105 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5106 f_fpr = TREE_CHAIN (f_gpr);
5107 f_ovf = TREE_CHAIN (f_fpr);
5108 f_sav = TREE_CHAIN (f_ovf);
5109
5110 valist = build_va_arg_indirect_ref (valist);
5111 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5112 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5113 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5114 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5115
5116 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5117 if (indirect_p)
5118 type = build_pointer_type (type);
5119 size = int_size_in_bytes (type);
5120 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5121
5122 nat_mode = type_natural_mode (type);
5123 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
5124 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
5125
5126 /* Pull the value out of the saved registers. */
5127
5128 addr = create_tmp_var (ptr_type_node, "addr");
5129 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
5130
5131 if (container)
5132 {
5133 int needed_intregs, needed_sseregs;
5134 bool need_temp;
5135 tree int_addr, sse_addr;
5136
5137 lab_false = create_artificial_label ();
5138 lab_over = create_artificial_label ();
5139
5140 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
5141
5142 need_temp = (!REG_P (container)
5143 && ((needed_intregs && TYPE_ALIGN (type) > 64)
5144 || TYPE_ALIGN (type) > 128));
5145
5146 /* In case we are passing structure, verify that it is consecutive block
5147 on the register save area. If not we need to do moves. */
5148 if (!need_temp && !REG_P (container))
5149 {
5150 /* Verify that all registers are strictly consecutive */
5151 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5152 {
5153 int i;
5154
5155 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5156 {
5157 rtx slot = XVECEXP (container, 0, i);
5158 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5159 || INTVAL (XEXP (slot, 1)) != i * 16)
5160 need_temp = 1;
5161 }
5162 }
5163 else
5164 {
5165 int i;
5166
5167 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5168 {
5169 rtx slot = XVECEXP (container, 0, i);
5170 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5171 || INTVAL (XEXP (slot, 1)) != i * 8)
5172 need_temp = 1;
5173 }
5174 }
5175 }
5176 if (!need_temp)
5177 {
5178 int_addr = addr;
5179 sse_addr = addr;
5180 }
5181 else
5182 {
5183 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5184 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5185 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5186 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5187 }
5188
5189 /* First ensure that we fit completely in registers. */
5190 if (needed_intregs)
5191 {
5192 t = build_int_cst (TREE_TYPE (gpr),
5193 (REGPARM_MAX - needed_intregs + 1) * 8);
5194 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5195 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5196 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5197 gimplify_and_add (t, pre_p);
5198 }
5199 if (needed_sseregs)
5200 {
5201 t = build_int_cst (TREE_TYPE (fpr),
5202 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5203 + REGPARM_MAX * 8);
5204 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5205 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5206 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5207 gimplify_and_add (t, pre_p);
5208 }
5209
5210 /* Compute index to start of area used for integer regs. */
5211 if (needed_intregs)
5212 {
5213 /* int_addr = gpr + sav; */
5214 t = fold_convert (sizetype, gpr);
5215 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5216 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5217 gimplify_and_add (t, pre_p);
5218 }
5219 if (needed_sseregs)
5220 {
5221 /* sse_addr = fpr + sav; */
5222 t = fold_convert (sizetype, fpr);
5223 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5224 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5225 gimplify_and_add (t, pre_p);
5226 }
5227 if (need_temp)
5228 {
5229 int i;
5230 tree temp = create_tmp_var (type, "va_arg_tmp");
5231
5232 /* addr = &temp; */
5233 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5234 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5235 gimplify_and_add (t, pre_p);
5236
5237 for (i = 0; i < XVECLEN (container, 0); i++)
5238 {
5239 rtx slot = XVECEXP (container, 0, i);
5240 rtx reg = XEXP (slot, 0);
5241 enum machine_mode mode = GET_MODE (reg);
5242 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5243 tree addr_type = build_pointer_type (piece_type);
5244 tree src_addr, src;
5245 int src_offset;
5246 tree dest_addr, dest;
5247
5248 if (SSE_REGNO_P (REGNO (reg)))
5249 {
5250 src_addr = sse_addr;
5251 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5252 }
5253 else
5254 {
5255 src_addr = int_addr;
5256 src_offset = REGNO (reg) * 8;
5257 }
5258 src_addr = fold_convert (addr_type, src_addr);
5259 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5260 size_int (src_offset));
5261 src = build_va_arg_indirect_ref (src_addr);
5262
5263 dest_addr = fold_convert (addr_type, addr);
5264 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5265 size_int (INTVAL (XEXP (slot, 1))));
5266 dest = build_va_arg_indirect_ref (dest_addr);
5267
5268 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5269 gimplify_and_add (t, pre_p);
5270 }
5271 }
5272
5273 if (needed_intregs)
5274 {
5275 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5276 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5277 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5278 gimplify_and_add (t, pre_p);
5279 }
5280 if (needed_sseregs)
5281 {
5282 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5283 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5284 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5285 gimplify_and_add (t, pre_p);
5286 }
5287
5288 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5289 gimplify_and_add (t, pre_p);
5290
5291 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5292 append_to_statement_list (t, pre_p);
5293 }
5294
5295 /* ... otherwise out of the overflow area. */
5296
5297 /* Care for on-stack alignment if needed. */
5298 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5299 || integer_zerop (TYPE_SIZE (type)))
5300 t = ovf;
5301 else
5302 {
5303 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5304 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5305 size_int (align - 1));
5306 t = fold_convert (sizetype, t);
5307 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5308 size_int (-align));
5309 t = fold_convert (TREE_TYPE (ovf), t);
5310 }
5311 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5312
5313 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5314 gimplify_and_add (t2, pre_p);
5315
5316 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5317 size_int (rsize * UNITS_PER_WORD));
5318 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5319 gimplify_and_add (t, pre_p);
5320
5321 if (container)
5322 {
5323 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5324 append_to_statement_list (t, pre_p);
5325 }
5326
5327 ptrtype = build_pointer_type (type);
5328 addr = fold_convert (ptrtype, addr);
5329
5330 if (indirect_p)
5331 addr = build_va_arg_indirect_ref (addr);
5332 return build_va_arg_indirect_ref (addr);
5333 }
5334 \f
5335 /* Return nonzero if OPNUM's MEM should be matched
5336 in movabs* patterns. */
5337
5338 int
5339 ix86_check_movabs (rtx insn, int opnum)
5340 {
5341 rtx set, mem;
5342
5343 set = PATTERN (insn);
5344 if (GET_CODE (set) == PARALLEL)
5345 set = XVECEXP (set, 0, 0);
5346 gcc_assert (GET_CODE (set) == SET);
5347 mem = XEXP (set, opnum);
5348 while (GET_CODE (mem) == SUBREG)
5349 mem = SUBREG_REG (mem);
5350 gcc_assert (MEM_P (mem));
5351 return (volatile_ok || !MEM_VOLATILE_P (mem));
5352 }
5353 \f
5354 /* Initialize the table of extra 80387 mathematical constants. */
5355
5356 static void
5357 init_ext_80387_constants (void)
5358 {
5359 static const char * cst[5] =
5360 {
5361 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5362 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5363 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5364 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5365 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5366 };
5367 int i;
5368
5369 for (i = 0; i < 5; i++)
5370 {
5371 real_from_string (&ext_80387_constants_table[i], cst[i]);
5372 /* Ensure each constant is rounded to XFmode precision. */
5373 real_convert (&ext_80387_constants_table[i],
5374 XFmode, &ext_80387_constants_table[i]);
5375 }
5376
5377 ext_80387_constants_init = 1;
5378 }
5379
5380 /* Return true if the constant is something that can be loaded with
5381 a special instruction. */
5382
5383 int
5384 standard_80387_constant_p (rtx x)
5385 {
5386 enum machine_mode mode = GET_MODE (x);
5387
5388 REAL_VALUE_TYPE r;
5389
5390 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5391 return -1;
5392
5393 if (x == CONST0_RTX (mode))
5394 return 1;
5395 if (x == CONST1_RTX (mode))
5396 return 2;
5397
5398 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5399
5400 /* For XFmode constants, try to find a special 80387 instruction when
5401 optimizing for size or on those CPUs that benefit from them. */
5402 if (mode == XFmode
5403 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5404 {
5405 int i;
5406
5407 if (! ext_80387_constants_init)
5408 init_ext_80387_constants ();
5409
5410 for (i = 0; i < 5; i++)
5411 if (real_identical (&r, &ext_80387_constants_table[i]))
5412 return i + 3;
5413 }
5414
5415 /* Load of the constant -0.0 or -1.0 will be split as
5416 fldz;fchs or fld1;fchs sequence. */
5417 if (real_isnegzero (&r))
5418 return 8;
5419 if (real_identical (&r, &dconstm1))
5420 return 9;
5421
5422 return 0;
5423 }
5424
5425 /* Return the opcode of the special instruction to be used to load
5426 the constant X. */
5427
5428 const char *
5429 standard_80387_constant_opcode (rtx x)
5430 {
5431 switch (standard_80387_constant_p (x))
5432 {
5433 case 1:
5434 return "fldz";
5435 case 2:
5436 return "fld1";
5437 case 3:
5438 return "fldlg2";
5439 case 4:
5440 return "fldln2";
5441 case 5:
5442 return "fldl2e";
5443 case 6:
5444 return "fldl2t";
5445 case 7:
5446 return "fldpi";
5447 case 8:
5448 case 9:
5449 return "#";
5450 default:
5451 gcc_unreachable ();
5452 }
5453 }
5454
5455 /* Return the CONST_DOUBLE representing the 80387 constant that is
5456 loaded by the specified special instruction. The argument IDX
5457 matches the return value from standard_80387_constant_p. */
5458
5459 rtx
5460 standard_80387_constant_rtx (int idx)
5461 {
5462 int i;
5463
5464 if (! ext_80387_constants_init)
5465 init_ext_80387_constants ();
5466
5467 switch (idx)
5468 {
5469 case 3:
5470 case 4:
5471 case 5:
5472 case 6:
5473 case 7:
5474 i = idx - 3;
5475 break;
5476
5477 default:
5478 gcc_unreachable ();
5479 }
5480
5481 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5482 XFmode);
5483 }
5484
5485 /* Return 1 if mode is a valid mode for sse. */
5486 static int
5487 standard_sse_mode_p (enum machine_mode mode)
5488 {
5489 switch (mode)
5490 {
5491 case V16QImode:
5492 case V8HImode:
5493 case V4SImode:
5494 case V2DImode:
5495 case V4SFmode:
5496 case V2DFmode:
5497 return 1;
5498
5499 default:
5500 return 0;
5501 }
5502 }
5503
5504 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5505 */
5506 int
5507 standard_sse_constant_p (rtx x)
5508 {
5509 enum machine_mode mode = GET_MODE (x);
5510
5511 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5512 return 1;
5513 if (vector_all_ones_operand (x, mode)
5514 && standard_sse_mode_p (mode))
5515 return TARGET_SSE2 ? 2 : -1;
5516
5517 return 0;
5518 }
5519
5520 /* Return the opcode of the special instruction to be used to load
5521 the constant X. */
5522
5523 const char *
5524 standard_sse_constant_opcode (rtx insn, rtx x)
5525 {
5526 switch (standard_sse_constant_p (x))
5527 {
5528 case 1:
5529 if (get_attr_mode (insn) == MODE_V4SF)
5530 return "xorps\t%0, %0";
5531 else if (get_attr_mode (insn) == MODE_V2DF)
5532 return "xorpd\t%0, %0";
5533 else
5534 return "pxor\t%0, %0";
5535 case 2:
5536 return "pcmpeqd\t%0, %0";
5537 }
5538 gcc_unreachable ();
5539 }
5540
5541 /* Returns 1 if OP contains a symbol reference */
5542
5543 int
5544 symbolic_reference_mentioned_p (rtx op)
5545 {
5546 const char *fmt;
5547 int i;
5548
5549 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5550 return 1;
5551
5552 fmt = GET_RTX_FORMAT (GET_CODE (op));
5553 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5554 {
5555 if (fmt[i] == 'E')
5556 {
5557 int j;
5558
5559 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5560 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5561 return 1;
5562 }
5563
5564 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5565 return 1;
5566 }
5567
5568 return 0;
5569 }
5570
5571 /* Return 1 if it is appropriate to emit `ret' instructions in the
5572 body of a function. Do this only if the epilogue is simple, needing a
5573 couple of insns. Prior to reloading, we can't tell how many registers
5574 must be saved, so return 0 then. Return 0 if there is no frame
5575 marker to de-allocate. */
5576
5577 int
5578 ix86_can_use_return_insn_p (void)
5579 {
5580 struct ix86_frame frame;
5581
5582 if (! reload_completed || frame_pointer_needed)
5583 return 0;
5584
5585 /* Don't allow more than 32 pop, since that's all we can do
5586 with one instruction. */
5587 if (current_function_pops_args
5588 && current_function_args_size >= 32768)
5589 return 0;
5590
5591 ix86_compute_frame_layout (&frame);
5592 return frame.to_allocate == 0 && frame.nregs == 0;
5593 }
5594 \f
5595 /* Value should be nonzero if functions must have frame pointers.
5596 Zero means the frame pointer need not be set up (and parms may
5597 be accessed via the stack pointer) in functions that seem suitable. */
5598
5599 int
5600 ix86_frame_pointer_required (void)
5601 {
5602 /* If we accessed previous frames, then the generated code expects
5603 to be able to access the saved ebp value in our frame. */
5604 if (cfun->machine->accesses_prev_frame)
5605 return 1;
5606
5607 /* Several x86 os'es need a frame pointer for other reasons,
5608 usually pertaining to setjmp. */
5609 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5610 return 1;
5611
5612 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5613 the frame pointer by default. Turn it back on now if we've not
5614 got a leaf function. */
5615 if (TARGET_OMIT_LEAF_FRAME_POINTER
5616 && (!current_function_is_leaf
5617 || ix86_current_function_calls_tls_descriptor))
5618 return 1;
5619
5620 if (current_function_profile)
5621 return 1;
5622
5623 return 0;
5624 }
5625
5626 /* Record that the current function accesses previous call frames. */
5627
5628 void
5629 ix86_setup_frame_addresses (void)
5630 {
5631 cfun->machine->accesses_prev_frame = 1;
5632 }
5633 \f
5634 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5635 # define USE_HIDDEN_LINKONCE 1
5636 #else
5637 # define USE_HIDDEN_LINKONCE 0
5638 #endif
5639
5640 static int pic_labels_used;
5641
5642 /* Fills in the label name that should be used for a pc thunk for
5643 the given register. */
5644
5645 static void
5646 get_pc_thunk_name (char name[32], unsigned int regno)
5647 {
5648 gcc_assert (!TARGET_64BIT);
5649
5650 if (USE_HIDDEN_LINKONCE)
5651 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5652 else
5653 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5654 }
5655
5656
5657 /* This function generates code for -fpic that loads %ebx with
5658 the return address of the caller and then returns. */
5659
5660 void
5661 ix86_file_end (void)
5662 {
5663 rtx xops[2];
5664 int regno;
5665
5666 for (regno = 0; regno < 8; ++regno)
5667 {
5668 char name[32];
5669
5670 if (! ((pic_labels_used >> regno) & 1))
5671 continue;
5672
5673 get_pc_thunk_name (name, regno);
5674
5675 #if TARGET_MACHO
5676 if (TARGET_MACHO)
5677 {
5678 switch_to_section (darwin_sections[text_coal_section]);
5679 fputs ("\t.weak_definition\t", asm_out_file);
5680 assemble_name (asm_out_file, name);
5681 fputs ("\n\t.private_extern\t", asm_out_file);
5682 assemble_name (asm_out_file, name);
5683 fputs ("\n", asm_out_file);
5684 ASM_OUTPUT_LABEL (asm_out_file, name);
5685 }
5686 else
5687 #endif
5688 if (USE_HIDDEN_LINKONCE)
5689 {
5690 tree decl;
5691
5692 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5693 error_mark_node);
5694 TREE_PUBLIC (decl) = 1;
5695 TREE_STATIC (decl) = 1;
5696 DECL_ONE_ONLY (decl) = 1;
5697
5698 (*targetm.asm_out.unique_section) (decl, 0);
5699 switch_to_section (get_named_section (decl, NULL, 0));
5700
5701 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5702 fputs ("\t.hidden\t", asm_out_file);
5703 assemble_name (asm_out_file, name);
5704 fputc ('\n', asm_out_file);
5705 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5706 }
5707 else
5708 {
5709 switch_to_section (text_section);
5710 ASM_OUTPUT_LABEL (asm_out_file, name);
5711 }
5712
5713 xops[0] = gen_rtx_REG (SImode, regno);
5714 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5715 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5716 output_asm_insn ("ret", xops);
5717 }
5718
5719 if (NEED_INDICATE_EXEC_STACK)
5720 file_end_indicate_exec_stack ();
5721 }
5722
5723 /* Emit code for the SET_GOT patterns. */
5724
5725 const char *
5726 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5727 {
5728 rtx xops[3];
5729
5730 xops[0] = dest;
5731
5732 if (TARGET_VXWORKS_RTP && flag_pic)
5733 {
5734 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5735 xops[2] = gen_rtx_MEM (Pmode,
5736 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5737 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5738
5739 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5740 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5741 an unadorned address. */
5742 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5743 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5744 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5745 return "";
5746 }
5747
5748 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5749
5750 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5751 {
5752 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5753
5754 if (!flag_pic)
5755 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5756 else
5757 output_asm_insn ("call\t%a2", xops);
5758
5759 #if TARGET_MACHO
5760 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5761 is what will be referenced by the Mach-O PIC subsystem. */
5762 if (!label)
5763 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5764 #endif
5765
5766 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5767 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5768
5769 if (flag_pic)
5770 output_asm_insn ("pop{l}\t%0", xops);
5771 }
5772 else
5773 {
5774 char name[32];
5775 get_pc_thunk_name (name, REGNO (dest));
5776 pic_labels_used |= 1 << REGNO (dest);
5777
5778 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5779 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5780 output_asm_insn ("call\t%X2", xops);
5781 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5782 is what will be referenced by the Mach-O PIC subsystem. */
5783 #if TARGET_MACHO
5784 if (!label)
5785 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5786 else
5787 targetm.asm_out.internal_label (asm_out_file, "L",
5788 CODE_LABEL_NUMBER (label));
5789 #endif
5790 }
5791
5792 if (TARGET_MACHO)
5793 return "";
5794
5795 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5796 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5797 else
5798 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5799
5800 return "";
5801 }
5802
5803 /* Generate an "push" pattern for input ARG. */
5804
5805 static rtx
5806 gen_push (rtx arg)
5807 {
5808 return gen_rtx_SET (VOIDmode,
5809 gen_rtx_MEM (Pmode,
5810 gen_rtx_PRE_DEC (Pmode,
5811 stack_pointer_rtx)),
5812 arg);
5813 }
5814
5815 /* Return >= 0 if there is an unused call-clobbered register available
5816 for the entire function. */
5817
5818 static unsigned int
5819 ix86_select_alt_pic_regnum (void)
5820 {
5821 if (current_function_is_leaf && !current_function_profile
5822 && !ix86_current_function_calls_tls_descriptor)
5823 {
5824 int i;
5825 for (i = 2; i >= 0; --i)
5826 if (!df_regs_ever_live_p (i))
5827 return i;
5828 }
5829
5830 return INVALID_REGNUM;
5831 }
5832
5833 /* Return 1 if we need to save REGNO. */
5834 static int
5835 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5836 {
5837 if (pic_offset_table_rtx
5838 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5839 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5840 || current_function_profile
5841 || current_function_calls_eh_return
5842 || current_function_uses_const_pool))
5843 {
5844 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5845 return 0;
5846 return 1;
5847 }
5848
5849 if (current_function_calls_eh_return && maybe_eh_return)
5850 {
5851 unsigned i;
5852 for (i = 0; ; i++)
5853 {
5854 unsigned test = EH_RETURN_DATA_REGNO (i);
5855 if (test == INVALID_REGNUM)
5856 break;
5857 if (test == regno)
5858 return 1;
5859 }
5860 }
5861
5862 if (cfun->machine->force_align_arg_pointer
5863 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5864 return 1;
5865
5866 return (df_regs_ever_live_p (regno)
5867 && !call_used_regs[regno]
5868 && !fixed_regs[regno]
5869 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5870 }
5871
5872 /* Return number of registers to be saved on the stack. */
5873
5874 static int
5875 ix86_nsaved_regs (void)
5876 {
5877 int nregs = 0;
5878 int regno;
5879
5880 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5881 if (ix86_save_reg (regno, true))
5882 nregs++;
5883 return nregs;
5884 }
5885
5886 /* Return the offset between two registers, one to be eliminated, and the other
5887 its replacement, at the start of a routine. */
5888
5889 HOST_WIDE_INT
5890 ix86_initial_elimination_offset (int from, int to)
5891 {
5892 struct ix86_frame frame;
5893 ix86_compute_frame_layout (&frame);
5894
5895 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5896 return frame.hard_frame_pointer_offset;
5897 else if (from == FRAME_POINTER_REGNUM
5898 && to == HARD_FRAME_POINTER_REGNUM)
5899 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5900 else
5901 {
5902 gcc_assert (to == STACK_POINTER_REGNUM);
5903
5904 if (from == ARG_POINTER_REGNUM)
5905 return frame.stack_pointer_offset;
5906
5907 gcc_assert (from == FRAME_POINTER_REGNUM);
5908 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5909 }
5910 }
5911
5912 /* Fill structure ix86_frame about frame of currently computed function. */
5913
5914 static void
5915 ix86_compute_frame_layout (struct ix86_frame *frame)
5916 {
5917 HOST_WIDE_INT total_size;
5918 unsigned int stack_alignment_needed;
5919 HOST_WIDE_INT offset;
5920 unsigned int preferred_alignment;
5921 HOST_WIDE_INT size = get_frame_size ();
5922
5923 frame->nregs = ix86_nsaved_regs ();
5924 total_size = size;
5925
5926 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5927 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5928
5929 /* During reload iteration the amount of registers saved can change.
5930 Recompute the value as needed. Do not recompute when amount of registers
5931 didn't change as reload does multiple calls to the function and does not
5932 expect the decision to change within single iteration. */
5933 if (!optimize_size
5934 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5935 {
5936 int count = frame->nregs;
5937
5938 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5939 /* The fast prologue uses move instead of push to save registers. This
5940 is significantly longer, but also executes faster as modern hardware
5941 can execute the moves in parallel, but can't do that for push/pop.
5942
5943 Be careful about choosing what prologue to emit: When function takes
5944 many instructions to execute we may use slow version as well as in
5945 case function is known to be outside hot spot (this is known with
5946 feedback only). Weight the size of function by number of registers
5947 to save as it is cheap to use one or two push instructions but very
5948 slow to use many of them. */
5949 if (count)
5950 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5951 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5952 || (flag_branch_probabilities
5953 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5954 cfun->machine->use_fast_prologue_epilogue = false;
5955 else
5956 cfun->machine->use_fast_prologue_epilogue
5957 = !expensive_function_p (count);
5958 }
5959 if (TARGET_PROLOGUE_USING_MOVE
5960 && cfun->machine->use_fast_prologue_epilogue)
5961 frame->save_regs_using_mov = true;
5962 else
5963 frame->save_regs_using_mov = false;
5964
5965
5966 /* Skip return address and saved base pointer. */
5967 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5968
5969 frame->hard_frame_pointer_offset = offset;
5970
5971 /* Do some sanity checking of stack_alignment_needed and
5972 preferred_alignment, since i386 port is the only using those features
5973 that may break easily. */
5974
5975 gcc_assert (!size || stack_alignment_needed);
5976 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5977 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5978 gcc_assert (stack_alignment_needed
5979 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5980
5981 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5982 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5983
5984 /* Register save area */
5985 offset += frame->nregs * UNITS_PER_WORD;
5986
5987 /* Va-arg area */
5988 if (ix86_save_varrargs_registers)
5989 {
5990 offset += X86_64_VARARGS_SIZE;
5991 frame->va_arg_size = X86_64_VARARGS_SIZE;
5992 }
5993 else
5994 frame->va_arg_size = 0;
5995
5996 /* Align start of frame for local function. */
5997 frame->padding1 = ((offset + stack_alignment_needed - 1)
5998 & -stack_alignment_needed) - offset;
5999
6000 offset += frame->padding1;
6001
6002 /* Frame pointer points here. */
6003 frame->frame_pointer_offset = offset;
6004
6005 offset += size;
6006
6007 /* Add outgoing arguments area. Can be skipped if we eliminated
6008 all the function calls as dead code.
6009 Skipping is however impossible when function calls alloca. Alloca
6010 expander assumes that last current_function_outgoing_args_size
6011 of stack frame are unused. */
6012 if (ACCUMULATE_OUTGOING_ARGS
6013 && (!current_function_is_leaf || current_function_calls_alloca
6014 || ix86_current_function_calls_tls_descriptor))
6015 {
6016 offset += current_function_outgoing_args_size;
6017 frame->outgoing_arguments_size = current_function_outgoing_args_size;
6018 }
6019 else
6020 frame->outgoing_arguments_size = 0;
6021
6022 /* Align stack boundary. Only needed if we're calling another function
6023 or using alloca. */
6024 if (!current_function_is_leaf || current_function_calls_alloca
6025 || ix86_current_function_calls_tls_descriptor)
6026 frame->padding2 = ((offset + preferred_alignment - 1)
6027 & -preferred_alignment) - offset;
6028 else
6029 frame->padding2 = 0;
6030
6031 offset += frame->padding2;
6032
6033 /* We've reached end of stack frame. */
6034 frame->stack_pointer_offset = offset;
6035
6036 /* Size prologue needs to allocate. */
6037 frame->to_allocate =
6038 (size + frame->padding1 + frame->padding2
6039 + frame->outgoing_arguments_size + frame->va_arg_size);
6040
6041 if ((!frame->to_allocate && frame->nregs <= 1)
6042 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
6043 frame->save_regs_using_mov = false;
6044
6045 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
6046 && current_function_is_leaf
6047 && !ix86_current_function_calls_tls_descriptor)
6048 {
6049 frame->red_zone_size = frame->to_allocate;
6050 if (frame->save_regs_using_mov)
6051 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6052 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6053 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6054 }
6055 else
6056 frame->red_zone_size = 0;
6057 frame->to_allocate -= frame->red_zone_size;
6058 frame->stack_pointer_offset -= frame->red_zone_size;
6059 #if 0
6060 fprintf (stderr, "\n");
6061 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
6062 fprintf (stderr, "size: %ld\n", (long)size);
6063 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
6064 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
6065 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
6066 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
6067 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
6068 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
6069 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
6070 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
6071 (long)frame->hard_frame_pointer_offset);
6072 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
6073 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
6074 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
6075 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
6076 #endif
6077 }
6078
6079 /* Emit code to save registers in the prologue. */
6080
6081 static void
6082 ix86_emit_save_regs (void)
6083 {
6084 unsigned int regno;
6085 rtx insn;
6086
6087 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
6088 if (ix86_save_reg (regno, true))
6089 {
6090 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
6091 RTX_FRAME_RELATED_P (insn) = 1;
6092 }
6093 }
6094
6095 /* Emit code to save registers using MOV insns. First register
6096 is restored from POINTER + OFFSET. */
6097 static void
6098 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
6099 {
6100 unsigned int regno;
6101 rtx insn;
6102
6103 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6104 if (ix86_save_reg (regno, true))
6105 {
6106 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
6107 Pmode, offset),
6108 gen_rtx_REG (Pmode, regno));
6109 RTX_FRAME_RELATED_P (insn) = 1;
6110 offset += UNITS_PER_WORD;
6111 }
6112 }
6113
6114 /* Expand prologue or epilogue stack adjustment.
6115 The pattern exist to put a dependency on all ebp-based memory accesses.
6116 STYLE should be negative if instructions should be marked as frame related,
6117 zero if %r11 register is live and cannot be freely used and positive
6118 otherwise. */
6119
6120 static void
6121 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
6122 {
6123 rtx insn;
6124
6125 if (! TARGET_64BIT)
6126 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
6127 else if (x86_64_immediate_operand (offset, DImode))
6128 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
6129 else
6130 {
6131 rtx r11;
6132 /* r11 is used by indirect sibcall return as well, set before the
6133 epilogue and used after the epilogue. ATM indirect sibcall
6134 shouldn't be used together with huge frame sizes in one
6135 function because of the frame_size check in sibcall.c. */
6136 gcc_assert (style);
6137 r11 = gen_rtx_REG (DImode, R11_REG);
6138 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
6139 if (style < 0)
6140 RTX_FRAME_RELATED_P (insn) = 1;
6141 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
6142 offset));
6143 }
6144 if (style < 0)
6145 RTX_FRAME_RELATED_P (insn) = 1;
6146 }
6147
6148 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6149
6150 static rtx
6151 ix86_internal_arg_pointer (void)
6152 {
6153 bool has_force_align_arg_pointer =
6154 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
6155 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
6156 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6157 && DECL_NAME (current_function_decl)
6158 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6159 && DECL_FILE_SCOPE_P (current_function_decl))
6160 || ix86_force_align_arg_pointer
6161 || has_force_align_arg_pointer)
6162 {
6163 /* Nested functions can't realign the stack due to a register
6164 conflict. */
6165 if (DECL_CONTEXT (current_function_decl)
6166 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
6167 {
6168 if (ix86_force_align_arg_pointer)
6169 warning (0, "-mstackrealign ignored for nested functions");
6170 if (has_force_align_arg_pointer)
6171 error ("%s not supported for nested functions",
6172 ix86_force_align_arg_pointer_string);
6173 return virtual_incoming_args_rtx;
6174 }
6175 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
6176 return copy_to_reg (cfun->machine->force_align_arg_pointer);
6177 }
6178 else
6179 return virtual_incoming_args_rtx;
6180 }
6181
6182 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6183 This is called from dwarf2out.c to emit call frame instructions
6184 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6185 static void
6186 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6187 {
6188 rtx unspec = SET_SRC (pattern);
6189 gcc_assert (GET_CODE (unspec) == UNSPEC);
6190
6191 switch (index)
6192 {
6193 case UNSPEC_REG_SAVE:
6194 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6195 SET_DEST (pattern));
6196 break;
6197 case UNSPEC_DEF_CFA:
6198 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6199 INTVAL (XVECEXP (unspec, 0, 0)));
6200 break;
6201 default:
6202 gcc_unreachable ();
6203 }
6204 }
6205
6206 /* Expand the prologue into a bunch of separate insns. */
6207
6208 void
6209 ix86_expand_prologue (void)
6210 {
6211 rtx insn;
6212 bool pic_reg_used;
6213 struct ix86_frame frame;
6214 HOST_WIDE_INT allocate;
6215
6216 ix86_compute_frame_layout (&frame);
6217
6218 if (cfun->machine->force_align_arg_pointer)
6219 {
6220 rtx x, y;
6221
6222 /* Grab the argument pointer. */
6223 x = plus_constant (stack_pointer_rtx, 4);
6224 y = cfun->machine->force_align_arg_pointer;
6225 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6226 RTX_FRAME_RELATED_P (insn) = 1;
6227
6228 /* The unwind info consists of two parts: install the fafp as the cfa,
6229 and record the fafp as the "save register" of the stack pointer.
6230 The later is there in order that the unwinder can see where it
6231 should restore the stack pointer across the and insn. */
6232 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6233 x = gen_rtx_SET (VOIDmode, y, x);
6234 RTX_FRAME_RELATED_P (x) = 1;
6235 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6236 UNSPEC_REG_SAVE);
6237 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6238 RTX_FRAME_RELATED_P (y) = 1;
6239 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6240 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6241 REG_NOTES (insn) = x;
6242
6243 /* Align the stack. */
6244 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6245 GEN_INT (-16)));
6246
6247 /* And here we cheat like madmen with the unwind info. We force the
6248 cfa register back to sp+4, which is exactly what it was at the
6249 start of the function. Re-pushing the return address results in
6250 the return at the same spot relative to the cfa, and thus is
6251 correct wrt the unwind info. */
6252 x = cfun->machine->force_align_arg_pointer;
6253 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6254 insn = emit_insn (gen_push (x));
6255 RTX_FRAME_RELATED_P (insn) = 1;
6256
6257 x = GEN_INT (4);
6258 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6259 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6260 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6261 REG_NOTES (insn) = x;
6262 }
6263
6264 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6265 slower on all targets. Also sdb doesn't like it. */
6266
6267 if (frame_pointer_needed)
6268 {
6269 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6270 RTX_FRAME_RELATED_P (insn) = 1;
6271
6272 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6273 RTX_FRAME_RELATED_P (insn) = 1;
6274 }
6275
6276 allocate = frame.to_allocate;
6277
6278 if (!frame.save_regs_using_mov)
6279 ix86_emit_save_regs ();
6280 else
6281 allocate += frame.nregs * UNITS_PER_WORD;
6282
6283 /* When using red zone we may start register saving before allocating
6284 the stack frame saving one cycle of the prologue. */
6285 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
6286 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6287 : stack_pointer_rtx,
6288 -frame.nregs * UNITS_PER_WORD);
6289
6290 if (allocate == 0)
6291 ;
6292 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6293 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6294 GEN_INT (-allocate), -1);
6295 else
6296 {
6297 /* Only valid for Win32. */
6298 rtx eax = gen_rtx_REG (Pmode, 0);
6299 bool eax_live;
6300 rtx t;
6301
6302 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6303
6304 if (TARGET_64BIT_MS_ABI)
6305 eax_live = false;
6306 else
6307 eax_live = ix86_eax_live_at_start_p ();
6308
6309 if (eax_live)
6310 {
6311 emit_insn (gen_push (eax));
6312 allocate -= UNITS_PER_WORD;
6313 }
6314
6315 emit_move_insn (eax, GEN_INT (allocate));
6316
6317 if (TARGET_64BIT)
6318 insn = gen_allocate_stack_worker_64 (eax);
6319 else
6320 insn = gen_allocate_stack_worker_32 (eax);
6321 insn = emit_insn (insn);
6322 RTX_FRAME_RELATED_P (insn) = 1;
6323 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6324 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6325 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6326 t, REG_NOTES (insn));
6327
6328 if (eax_live)
6329 {
6330 if (frame_pointer_needed)
6331 t = plus_constant (hard_frame_pointer_rtx,
6332 allocate
6333 - frame.to_allocate
6334 - frame.nregs * UNITS_PER_WORD);
6335 else
6336 t = plus_constant (stack_pointer_rtx, allocate);
6337 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6338 }
6339 }
6340
6341 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
6342 {
6343 if (!frame_pointer_needed || !frame.to_allocate)
6344 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6345 else
6346 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6347 -frame.nregs * UNITS_PER_WORD);
6348 }
6349
6350 pic_reg_used = false;
6351 if (pic_offset_table_rtx
6352 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6353 || current_function_profile))
6354 {
6355 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6356
6357 if (alt_pic_reg_used != INVALID_REGNUM)
6358 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6359
6360 pic_reg_used = true;
6361 }
6362
6363 if (pic_reg_used)
6364 {
6365 if (TARGET_64BIT)
6366 {
6367 if (ix86_cmodel == CM_LARGE_PIC)
6368 {
6369 rtx tmp_reg = gen_rtx_REG (DImode,
6370 FIRST_REX_INT_REG + 3 /* R11 */);
6371 rtx label = gen_label_rtx ();
6372 emit_label (label);
6373 LABEL_PRESERVE_P (label) = 1;
6374 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6375 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6376 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6377 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6378 pic_offset_table_rtx, tmp_reg));
6379 }
6380 else
6381 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6382 }
6383 else
6384 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6385 }
6386
6387 /* Prevent function calls from being scheduled before the call to mcount.
6388 In the pic_reg_used case, make sure that the got load isn't deleted. */
6389 if (current_function_profile)
6390 {
6391 if (pic_reg_used)
6392 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6393 emit_insn (gen_blockage ());
6394 }
6395 }
6396
6397 /* Emit code to restore saved registers using MOV insns. First register
6398 is restored from POINTER + OFFSET. */
6399 static void
6400 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6401 int maybe_eh_return)
6402 {
6403 int regno;
6404 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6405
6406 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6407 if (ix86_save_reg (regno, maybe_eh_return))
6408 {
6409 /* Ensure that adjust_address won't be forced to produce pointer
6410 out of range allowed by x86-64 instruction set. */
6411 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6412 {
6413 rtx r11;
6414
6415 r11 = gen_rtx_REG (DImode, R11_REG);
6416 emit_move_insn (r11, GEN_INT (offset));
6417 emit_insn (gen_adddi3 (r11, r11, pointer));
6418 base_address = gen_rtx_MEM (Pmode, r11);
6419 offset = 0;
6420 }
6421 emit_move_insn (gen_rtx_REG (Pmode, regno),
6422 adjust_address (base_address, Pmode, offset));
6423 offset += UNITS_PER_WORD;
6424 }
6425 }
6426
6427 /* Restore function stack, frame, and registers. */
6428
6429 void
6430 ix86_expand_epilogue (int style)
6431 {
6432 int regno;
6433 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6434 struct ix86_frame frame;
6435 HOST_WIDE_INT offset;
6436
6437 ix86_compute_frame_layout (&frame);
6438
6439 /* Calculate start of saved registers relative to ebp. Special care
6440 must be taken for the normal return case of a function using
6441 eh_return: the eax and edx registers are marked as saved, but not
6442 restored along this path. */
6443 offset = frame.nregs;
6444 if (current_function_calls_eh_return && style != 2)
6445 offset -= 2;
6446 offset *= -UNITS_PER_WORD;
6447
6448 /* If we're only restoring one register and sp is not valid then
6449 using a move instruction to restore the register since it's
6450 less work than reloading sp and popping the register.
6451
6452 The default code result in stack adjustment using add/lea instruction,
6453 while this code results in LEAVE instruction (or discrete equivalent),
6454 so it is profitable in some other cases as well. Especially when there
6455 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6456 and there is exactly one register to pop. This heuristic may need some
6457 tuning in future. */
6458 if ((!sp_valid && frame.nregs <= 1)
6459 || (TARGET_EPILOGUE_USING_MOVE
6460 && cfun->machine->use_fast_prologue_epilogue
6461 && (frame.nregs > 1 || frame.to_allocate))
6462 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6463 || (frame_pointer_needed && TARGET_USE_LEAVE
6464 && cfun->machine->use_fast_prologue_epilogue
6465 && frame.nregs == 1)
6466 || current_function_calls_eh_return)
6467 {
6468 /* Restore registers. We can use ebp or esp to address the memory
6469 locations. If both are available, default to ebp, since offsets
6470 are known to be small. Only exception is esp pointing directly to the
6471 end of block of saved registers, where we may simplify addressing
6472 mode. */
6473
6474 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6475 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6476 frame.to_allocate, style == 2);
6477 else
6478 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6479 offset, style == 2);
6480
6481 /* eh_return epilogues need %ecx added to the stack pointer. */
6482 if (style == 2)
6483 {
6484 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6485
6486 if (frame_pointer_needed)
6487 {
6488 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6489 tmp = plus_constant (tmp, UNITS_PER_WORD);
6490 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6491
6492 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6493 emit_move_insn (hard_frame_pointer_rtx, tmp);
6494
6495 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6496 const0_rtx, style);
6497 }
6498 else
6499 {
6500 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6501 tmp = plus_constant (tmp, (frame.to_allocate
6502 + frame.nregs * UNITS_PER_WORD));
6503 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6504 }
6505 }
6506 else if (!frame_pointer_needed)
6507 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6508 GEN_INT (frame.to_allocate
6509 + frame.nregs * UNITS_PER_WORD),
6510 style);
6511 /* If not an i386, mov & pop is faster than "leave". */
6512 else if (TARGET_USE_LEAVE || optimize_size
6513 || !cfun->machine->use_fast_prologue_epilogue)
6514 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6515 else
6516 {
6517 pro_epilogue_adjust_stack (stack_pointer_rtx,
6518 hard_frame_pointer_rtx,
6519 const0_rtx, style);
6520 if (TARGET_64BIT)
6521 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6522 else
6523 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6524 }
6525 }
6526 else
6527 {
6528 /* First step is to deallocate the stack frame so that we can
6529 pop the registers. */
6530 if (!sp_valid)
6531 {
6532 gcc_assert (frame_pointer_needed);
6533 pro_epilogue_adjust_stack (stack_pointer_rtx,
6534 hard_frame_pointer_rtx,
6535 GEN_INT (offset), style);
6536 }
6537 else if (frame.to_allocate)
6538 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6539 GEN_INT (frame.to_allocate), style);
6540
6541 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6542 if (ix86_save_reg (regno, false))
6543 {
6544 if (TARGET_64BIT)
6545 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6546 else
6547 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6548 }
6549 if (frame_pointer_needed)
6550 {
6551 /* Leave results in shorter dependency chains on CPUs that are
6552 able to grok it fast. */
6553 if (TARGET_USE_LEAVE)
6554 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6555 else if (TARGET_64BIT)
6556 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6557 else
6558 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6559 }
6560 }
6561
6562 if (cfun->machine->force_align_arg_pointer)
6563 {
6564 emit_insn (gen_addsi3 (stack_pointer_rtx,
6565 cfun->machine->force_align_arg_pointer,
6566 GEN_INT (-4)));
6567 }
6568
6569 /* Sibcall epilogues don't want a return instruction. */
6570 if (style == 0)
6571 return;
6572
6573 if (current_function_pops_args && current_function_args_size)
6574 {
6575 rtx popc = GEN_INT (current_function_pops_args);
6576
6577 /* i386 can only pop 64K bytes. If asked to pop more, pop
6578 return address, do explicit add, and jump indirectly to the
6579 caller. */
6580
6581 if (current_function_pops_args >= 65536)
6582 {
6583 rtx ecx = gen_rtx_REG (SImode, 2);
6584
6585 /* There is no "pascal" calling convention in any 64bit ABI. */
6586 gcc_assert (!TARGET_64BIT);
6587
6588 emit_insn (gen_popsi1 (ecx));
6589 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6590 emit_jump_insn (gen_return_indirect_internal (ecx));
6591 }
6592 else
6593 emit_jump_insn (gen_return_pop_internal (popc));
6594 }
6595 else
6596 emit_jump_insn (gen_return_internal ());
6597 }
6598
6599 /* Reset from the function's potential modifications. */
6600
6601 static void
6602 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6603 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6604 {
6605 if (pic_offset_table_rtx)
6606 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6607 #if TARGET_MACHO
6608 /* Mach-O doesn't support labels at the end of objects, so if
6609 it looks like we might want one, insert a NOP. */
6610 {
6611 rtx insn = get_last_insn ();
6612 while (insn
6613 && NOTE_P (insn)
6614 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6615 insn = PREV_INSN (insn);
6616 if (insn
6617 && (LABEL_P (insn)
6618 || (NOTE_P (insn)
6619 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6620 fputs ("\tnop\n", file);
6621 }
6622 #endif
6623
6624 }
6625 \f
6626 /* Extract the parts of an RTL expression that is a valid memory address
6627 for an instruction. Return 0 if the structure of the address is
6628 grossly off. Return -1 if the address contains ASHIFT, so it is not
6629 strictly valid, but still used for computing length of lea instruction. */
6630
6631 int
6632 ix86_decompose_address (rtx addr, struct ix86_address *out)
6633 {
6634 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6635 rtx base_reg, index_reg;
6636 HOST_WIDE_INT scale = 1;
6637 rtx scale_rtx = NULL_RTX;
6638 int retval = 1;
6639 enum ix86_address_seg seg = SEG_DEFAULT;
6640
6641 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6642 base = addr;
6643 else if (GET_CODE (addr) == PLUS)
6644 {
6645 rtx addends[4], op;
6646 int n = 0, i;
6647
6648 op = addr;
6649 do
6650 {
6651 if (n >= 4)
6652 return 0;
6653 addends[n++] = XEXP (op, 1);
6654 op = XEXP (op, 0);
6655 }
6656 while (GET_CODE (op) == PLUS);
6657 if (n >= 4)
6658 return 0;
6659 addends[n] = op;
6660
6661 for (i = n; i >= 0; --i)
6662 {
6663 op = addends[i];
6664 switch (GET_CODE (op))
6665 {
6666 case MULT:
6667 if (index)
6668 return 0;
6669 index = XEXP (op, 0);
6670 scale_rtx = XEXP (op, 1);
6671 break;
6672
6673 case UNSPEC:
6674 if (XINT (op, 1) == UNSPEC_TP
6675 && TARGET_TLS_DIRECT_SEG_REFS
6676 && seg == SEG_DEFAULT)
6677 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6678 else
6679 return 0;
6680 break;
6681
6682 case REG:
6683 case SUBREG:
6684 if (!base)
6685 base = op;
6686 else if (!index)
6687 index = op;
6688 else
6689 return 0;
6690 break;
6691
6692 case CONST:
6693 case CONST_INT:
6694 case SYMBOL_REF:
6695 case LABEL_REF:
6696 if (disp)
6697 return 0;
6698 disp = op;
6699 break;
6700
6701 default:
6702 return 0;
6703 }
6704 }
6705 }
6706 else if (GET_CODE (addr) == MULT)
6707 {
6708 index = XEXP (addr, 0); /* index*scale */
6709 scale_rtx = XEXP (addr, 1);
6710 }
6711 else if (GET_CODE (addr) == ASHIFT)
6712 {
6713 rtx tmp;
6714
6715 /* We're called for lea too, which implements ashift on occasion. */
6716 index = XEXP (addr, 0);
6717 tmp = XEXP (addr, 1);
6718 if (!CONST_INT_P (tmp))
6719 return 0;
6720 scale = INTVAL (tmp);
6721 if ((unsigned HOST_WIDE_INT) scale > 3)
6722 return 0;
6723 scale = 1 << scale;
6724 retval = -1;
6725 }
6726 else
6727 disp = addr; /* displacement */
6728
6729 /* Extract the integral value of scale. */
6730 if (scale_rtx)
6731 {
6732 if (!CONST_INT_P (scale_rtx))
6733 return 0;
6734 scale = INTVAL (scale_rtx);
6735 }
6736
6737 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6738 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6739
6740 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6741 if (base_reg && index_reg && scale == 1
6742 && (index_reg == arg_pointer_rtx
6743 || index_reg == frame_pointer_rtx
6744 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6745 {
6746 rtx tmp;
6747 tmp = base, base = index, index = tmp;
6748 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6749 }
6750
6751 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6752 if ((base_reg == hard_frame_pointer_rtx
6753 || base_reg == frame_pointer_rtx
6754 || base_reg == arg_pointer_rtx) && !disp)
6755 disp = const0_rtx;
6756
6757 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6758 Avoid this by transforming to [%esi+0]. */
6759 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6760 && base_reg && !index_reg && !disp
6761 && REG_P (base_reg)
6762 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6763 disp = const0_rtx;
6764
6765 /* Special case: encode reg+reg instead of reg*2. */
6766 if (!base && index && scale && scale == 2)
6767 base = index, base_reg = index_reg, scale = 1;
6768
6769 /* Special case: scaling cannot be encoded without base or displacement. */
6770 if (!base && !disp && index && scale != 1)
6771 disp = const0_rtx;
6772
6773 out->base = base;
6774 out->index = index;
6775 out->disp = disp;
6776 out->scale = scale;
6777 out->seg = seg;
6778
6779 return retval;
6780 }
6781 \f
6782 /* Return cost of the memory address x.
6783 For i386, it is better to use a complex address than let gcc copy
6784 the address into a reg and make a new pseudo. But not if the address
6785 requires to two regs - that would mean more pseudos with longer
6786 lifetimes. */
6787 static int
6788 ix86_address_cost (rtx x)
6789 {
6790 struct ix86_address parts;
6791 int cost = 1;
6792 int ok = ix86_decompose_address (x, &parts);
6793
6794 gcc_assert (ok);
6795
6796 if (parts.base && GET_CODE (parts.base) == SUBREG)
6797 parts.base = SUBREG_REG (parts.base);
6798 if (parts.index && GET_CODE (parts.index) == SUBREG)
6799 parts.index = SUBREG_REG (parts.index);
6800
6801 /* Attempt to minimize number of registers in the address. */
6802 if ((parts.base
6803 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6804 || (parts.index
6805 && (!REG_P (parts.index)
6806 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6807 cost++;
6808
6809 if (parts.base
6810 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6811 && parts.index
6812 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6813 && parts.base != parts.index)
6814 cost++;
6815
6816 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6817 since it's predecode logic can't detect the length of instructions
6818 and it degenerates to vector decoded. Increase cost of such
6819 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6820 to split such addresses or even refuse such addresses at all.
6821
6822 Following addressing modes are affected:
6823 [base+scale*index]
6824 [scale*index+disp]
6825 [base+index]
6826
6827 The first and last case may be avoidable by explicitly coding the zero in
6828 memory address, but I don't have AMD-K6 machine handy to check this
6829 theory. */
6830
6831 if (TARGET_K6
6832 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6833 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6834 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6835 cost += 10;
6836
6837 return cost;
6838 }
6839 \f
6840 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6841 this is used for to form addresses to local data when -fPIC is in
6842 use. */
6843
6844 static bool
6845 darwin_local_data_pic (rtx disp)
6846 {
6847 if (GET_CODE (disp) == MINUS)
6848 {
6849 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6850 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6851 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6852 {
6853 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6854 if (! strcmp (sym_name, "<pic base>"))
6855 return true;
6856 }
6857 }
6858
6859 return false;
6860 }
6861
6862 /* Determine if a given RTX is a valid constant. We already know this
6863 satisfies CONSTANT_P. */
6864
6865 bool
6866 legitimate_constant_p (rtx x)
6867 {
6868 switch (GET_CODE (x))
6869 {
6870 case CONST:
6871 x = XEXP (x, 0);
6872
6873 if (GET_CODE (x) == PLUS)
6874 {
6875 if (!CONST_INT_P (XEXP (x, 1)))
6876 return false;
6877 x = XEXP (x, 0);
6878 }
6879
6880 if (TARGET_MACHO && darwin_local_data_pic (x))
6881 return true;
6882
6883 /* Only some unspecs are valid as "constants". */
6884 if (GET_CODE (x) == UNSPEC)
6885 switch (XINT (x, 1))
6886 {
6887 case UNSPEC_GOT:
6888 case UNSPEC_GOTOFF:
6889 case UNSPEC_PLTOFF:
6890 return TARGET_64BIT;
6891 case UNSPEC_TPOFF:
6892 case UNSPEC_NTPOFF:
6893 x = XVECEXP (x, 0, 0);
6894 return (GET_CODE (x) == SYMBOL_REF
6895 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6896 case UNSPEC_DTPOFF:
6897 x = XVECEXP (x, 0, 0);
6898 return (GET_CODE (x) == SYMBOL_REF
6899 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6900 default:
6901 return false;
6902 }
6903
6904 /* We must have drilled down to a symbol. */
6905 if (GET_CODE (x) == LABEL_REF)
6906 return true;
6907 if (GET_CODE (x) != SYMBOL_REF)
6908 return false;
6909 /* FALLTHRU */
6910
6911 case SYMBOL_REF:
6912 /* TLS symbols are never valid. */
6913 if (SYMBOL_REF_TLS_MODEL (x))
6914 return false;
6915
6916 /* DLLIMPORT symbols are never valid. */
6917 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6918 && SYMBOL_REF_DLLIMPORT_P (x))
6919 return false;
6920 break;
6921
6922 case CONST_DOUBLE:
6923 if (GET_MODE (x) == TImode
6924 && x != CONST0_RTX (TImode)
6925 && !TARGET_64BIT)
6926 return false;
6927 break;
6928
6929 case CONST_VECTOR:
6930 if (x == CONST0_RTX (GET_MODE (x)))
6931 return true;
6932 return false;
6933
6934 default:
6935 break;
6936 }
6937
6938 /* Otherwise we handle everything else in the move patterns. */
6939 return true;
6940 }
6941
6942 /* Determine if it's legal to put X into the constant pool. This
6943 is not possible for the address of thread-local symbols, which
6944 is checked above. */
6945
6946 static bool
6947 ix86_cannot_force_const_mem (rtx x)
6948 {
6949 /* We can always put integral constants and vectors in memory. */
6950 switch (GET_CODE (x))
6951 {
6952 case CONST_INT:
6953 case CONST_DOUBLE:
6954 case CONST_VECTOR:
6955 return false;
6956
6957 default:
6958 break;
6959 }
6960 return !legitimate_constant_p (x);
6961 }
6962
6963 /* Determine if a given RTX is a valid constant address. */
6964
6965 bool
6966 constant_address_p (rtx x)
6967 {
6968 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6969 }
6970
6971 /* Nonzero if the constant value X is a legitimate general operand
6972 when generating PIC code. It is given that flag_pic is on and
6973 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6974
6975 bool
6976 legitimate_pic_operand_p (rtx x)
6977 {
6978 rtx inner;
6979
6980 switch (GET_CODE (x))
6981 {
6982 case CONST:
6983 inner = XEXP (x, 0);
6984 if (GET_CODE (inner) == PLUS
6985 && CONST_INT_P (XEXP (inner, 1)))
6986 inner = XEXP (inner, 0);
6987
6988 /* Only some unspecs are valid as "constants". */
6989 if (GET_CODE (inner) == UNSPEC)
6990 switch (XINT (inner, 1))
6991 {
6992 case UNSPEC_GOT:
6993 case UNSPEC_GOTOFF:
6994 case UNSPEC_PLTOFF:
6995 return TARGET_64BIT;
6996 case UNSPEC_TPOFF:
6997 x = XVECEXP (inner, 0, 0);
6998 return (GET_CODE (x) == SYMBOL_REF
6999 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7000 default:
7001 return false;
7002 }
7003 /* FALLTHRU */
7004
7005 case SYMBOL_REF:
7006 case LABEL_REF:
7007 return legitimate_pic_address_disp_p (x);
7008
7009 default:
7010 return true;
7011 }
7012 }
7013
7014 /* Determine if a given CONST RTX is a valid memory displacement
7015 in PIC mode. */
7016
7017 int
7018 legitimate_pic_address_disp_p (rtx disp)
7019 {
7020 bool saw_plus;
7021
7022 /* In 64bit mode we can allow direct addresses of symbols and labels
7023 when they are not dynamic symbols. */
7024 if (TARGET_64BIT)
7025 {
7026 rtx op0 = disp, op1;
7027
7028 switch (GET_CODE (disp))
7029 {
7030 case LABEL_REF:
7031 return true;
7032
7033 case CONST:
7034 if (GET_CODE (XEXP (disp, 0)) != PLUS)
7035 break;
7036 op0 = XEXP (XEXP (disp, 0), 0);
7037 op1 = XEXP (XEXP (disp, 0), 1);
7038 if (!CONST_INT_P (op1)
7039 || INTVAL (op1) >= 16*1024*1024
7040 || INTVAL (op1) < -16*1024*1024)
7041 break;
7042 if (GET_CODE (op0) == LABEL_REF)
7043 return true;
7044 if (GET_CODE (op0) != SYMBOL_REF)
7045 break;
7046 /* FALLTHRU */
7047
7048 case SYMBOL_REF:
7049 /* TLS references should always be enclosed in UNSPEC. */
7050 if (SYMBOL_REF_TLS_MODEL (op0))
7051 return false;
7052 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
7053 && ix86_cmodel != CM_LARGE_PIC)
7054 return true;
7055 break;
7056
7057 default:
7058 break;
7059 }
7060 }
7061 if (GET_CODE (disp) != CONST)
7062 return 0;
7063 disp = XEXP (disp, 0);
7064
7065 if (TARGET_64BIT)
7066 {
7067 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7068 of GOT tables. We should not need these anyway. */
7069 if (GET_CODE (disp) != UNSPEC
7070 || (XINT (disp, 1) != UNSPEC_GOTPCREL
7071 && XINT (disp, 1) != UNSPEC_GOTOFF
7072 && XINT (disp, 1) != UNSPEC_PLTOFF))
7073 return 0;
7074
7075 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
7076 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
7077 return 0;
7078 return 1;
7079 }
7080
7081 saw_plus = false;
7082 if (GET_CODE (disp) == PLUS)
7083 {
7084 if (!CONST_INT_P (XEXP (disp, 1)))
7085 return 0;
7086 disp = XEXP (disp, 0);
7087 saw_plus = true;
7088 }
7089
7090 if (TARGET_MACHO && darwin_local_data_pic (disp))
7091 return 1;
7092
7093 if (GET_CODE (disp) != UNSPEC)
7094 return 0;
7095
7096 switch (XINT (disp, 1))
7097 {
7098 case UNSPEC_GOT:
7099 if (saw_plus)
7100 return false;
7101 /* We need to check for both symbols and labels because VxWorks loads
7102 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7103 details. */
7104 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7105 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
7106 case UNSPEC_GOTOFF:
7107 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7108 While ABI specify also 32bit relocation but we don't produce it in
7109 small PIC model at all. */
7110 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7111 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
7112 && !TARGET_64BIT)
7113 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
7114 return false;
7115 case UNSPEC_GOTTPOFF:
7116 case UNSPEC_GOTNTPOFF:
7117 case UNSPEC_INDNTPOFF:
7118 if (saw_plus)
7119 return false;
7120 disp = XVECEXP (disp, 0, 0);
7121 return (GET_CODE (disp) == SYMBOL_REF
7122 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
7123 case UNSPEC_NTPOFF:
7124 disp = XVECEXP (disp, 0, 0);
7125 return (GET_CODE (disp) == SYMBOL_REF
7126 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
7127 case UNSPEC_DTPOFF:
7128 disp = XVECEXP (disp, 0, 0);
7129 return (GET_CODE (disp) == SYMBOL_REF
7130 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
7131 }
7132
7133 return 0;
7134 }
7135
7136 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7137 memory address for an instruction. The MODE argument is the machine mode
7138 for the MEM expression that wants to use this address.
7139
7140 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7141 convert common non-canonical forms to canonical form so that they will
7142 be recognized. */
7143
7144 int
7145 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
7146 rtx addr, int strict)
7147 {
7148 struct ix86_address parts;
7149 rtx base, index, disp;
7150 HOST_WIDE_INT scale;
7151 const char *reason = NULL;
7152 rtx reason_rtx = NULL_RTX;
7153
7154 if (ix86_decompose_address (addr, &parts) <= 0)
7155 {
7156 reason = "decomposition failed";
7157 goto report_error;
7158 }
7159
7160 base = parts.base;
7161 index = parts.index;
7162 disp = parts.disp;
7163 scale = parts.scale;
7164
7165 /* Validate base register.
7166
7167 Don't allow SUBREG's that span more than a word here. It can lead to spill
7168 failures when the base is one word out of a two word structure, which is
7169 represented internally as a DImode int. */
7170
7171 if (base)
7172 {
7173 rtx reg;
7174 reason_rtx = base;
7175
7176 if (REG_P (base))
7177 reg = base;
7178 else if (GET_CODE (base) == SUBREG
7179 && REG_P (SUBREG_REG (base))
7180 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7181 <= UNITS_PER_WORD)
7182 reg = SUBREG_REG (base);
7183 else
7184 {
7185 reason = "base is not a register";
7186 goto report_error;
7187 }
7188
7189 if (GET_MODE (base) != Pmode)
7190 {
7191 reason = "base is not in Pmode";
7192 goto report_error;
7193 }
7194
7195 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7196 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7197 {
7198 reason = "base is not valid";
7199 goto report_error;
7200 }
7201 }
7202
7203 /* Validate index register.
7204
7205 Don't allow SUBREG's that span more than a word here -- same as above. */
7206
7207 if (index)
7208 {
7209 rtx reg;
7210 reason_rtx = index;
7211
7212 if (REG_P (index))
7213 reg = index;
7214 else if (GET_CODE (index) == SUBREG
7215 && REG_P (SUBREG_REG (index))
7216 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7217 <= UNITS_PER_WORD)
7218 reg = SUBREG_REG (index);
7219 else
7220 {
7221 reason = "index is not a register";
7222 goto report_error;
7223 }
7224
7225 if (GET_MODE (index) != Pmode)
7226 {
7227 reason = "index is not in Pmode";
7228 goto report_error;
7229 }
7230
7231 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7232 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7233 {
7234 reason = "index is not valid";
7235 goto report_error;
7236 }
7237 }
7238
7239 /* Validate scale factor. */
7240 if (scale != 1)
7241 {
7242 reason_rtx = GEN_INT (scale);
7243 if (!index)
7244 {
7245 reason = "scale without index";
7246 goto report_error;
7247 }
7248
7249 if (scale != 2 && scale != 4 && scale != 8)
7250 {
7251 reason = "scale is not a valid multiplier";
7252 goto report_error;
7253 }
7254 }
7255
7256 /* Validate displacement. */
7257 if (disp)
7258 {
7259 reason_rtx = disp;
7260
7261 if (GET_CODE (disp) == CONST
7262 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7263 switch (XINT (XEXP (disp, 0), 1))
7264 {
7265 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7266 used. While ABI specify also 32bit relocations, we don't produce
7267 them at all and use IP relative instead. */
7268 case UNSPEC_GOT:
7269 case UNSPEC_GOTOFF:
7270 gcc_assert (flag_pic);
7271 if (!TARGET_64BIT)
7272 goto is_legitimate_pic;
7273 reason = "64bit address unspec";
7274 goto report_error;
7275
7276 case UNSPEC_GOTPCREL:
7277 gcc_assert (flag_pic);
7278 goto is_legitimate_pic;
7279
7280 case UNSPEC_GOTTPOFF:
7281 case UNSPEC_GOTNTPOFF:
7282 case UNSPEC_INDNTPOFF:
7283 case UNSPEC_NTPOFF:
7284 case UNSPEC_DTPOFF:
7285 break;
7286
7287 default:
7288 reason = "invalid address unspec";
7289 goto report_error;
7290 }
7291
7292 else if (SYMBOLIC_CONST (disp)
7293 && (flag_pic
7294 || (TARGET_MACHO
7295 #if TARGET_MACHO
7296 && MACHOPIC_INDIRECT
7297 && !machopic_operand_p (disp)
7298 #endif
7299 )))
7300 {
7301
7302 is_legitimate_pic:
7303 if (TARGET_64BIT && (index || base))
7304 {
7305 /* foo@dtpoff(%rX) is ok. */
7306 if (GET_CODE (disp) != CONST
7307 || GET_CODE (XEXP (disp, 0)) != PLUS
7308 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7309 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7310 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7311 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7312 {
7313 reason = "non-constant pic memory reference";
7314 goto report_error;
7315 }
7316 }
7317 else if (! legitimate_pic_address_disp_p (disp))
7318 {
7319 reason = "displacement is an invalid pic construct";
7320 goto report_error;
7321 }
7322
7323 /* This code used to verify that a symbolic pic displacement
7324 includes the pic_offset_table_rtx register.
7325
7326 While this is good idea, unfortunately these constructs may
7327 be created by "adds using lea" optimization for incorrect
7328 code like:
7329
7330 int a;
7331 int foo(int i)
7332 {
7333 return *(&a+i);
7334 }
7335
7336 This code is nonsensical, but results in addressing
7337 GOT table with pic_offset_table_rtx base. We can't
7338 just refuse it easily, since it gets matched by
7339 "addsi3" pattern, that later gets split to lea in the
7340 case output register differs from input. While this
7341 can be handled by separate addsi pattern for this case
7342 that never results in lea, this seems to be easier and
7343 correct fix for crash to disable this test. */
7344 }
7345 else if (GET_CODE (disp) != LABEL_REF
7346 && !CONST_INT_P (disp)
7347 && (GET_CODE (disp) != CONST
7348 || !legitimate_constant_p (disp))
7349 && (GET_CODE (disp) != SYMBOL_REF
7350 || !legitimate_constant_p (disp)))
7351 {
7352 reason = "displacement is not constant";
7353 goto report_error;
7354 }
7355 else if (TARGET_64BIT
7356 && !x86_64_immediate_operand (disp, VOIDmode))
7357 {
7358 reason = "displacement is out of range";
7359 goto report_error;
7360 }
7361 }
7362
7363 /* Everything looks valid. */
7364 return TRUE;
7365
7366 report_error:
7367 return FALSE;
7368 }
7369 \f
7370 /* Return a unique alias set for the GOT. */
7371
7372 static alias_set_type
7373 ix86_GOT_alias_set (void)
7374 {
7375 static alias_set_type set = -1;
7376 if (set == -1)
7377 set = new_alias_set ();
7378 return set;
7379 }
7380
7381 /* Return a legitimate reference for ORIG (an address) using the
7382 register REG. If REG is 0, a new pseudo is generated.
7383
7384 There are two types of references that must be handled:
7385
7386 1. Global data references must load the address from the GOT, via
7387 the PIC reg. An insn is emitted to do this load, and the reg is
7388 returned.
7389
7390 2. Static data references, constant pool addresses, and code labels
7391 compute the address as an offset from the GOT, whose base is in
7392 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7393 differentiate them from global data objects. The returned
7394 address is the PIC reg + an unspec constant.
7395
7396 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7397 reg also appears in the address. */
7398
7399 static rtx
7400 legitimize_pic_address (rtx orig, rtx reg)
7401 {
7402 rtx addr = orig;
7403 rtx new_rtx = orig;
7404 rtx base;
7405
7406 #if TARGET_MACHO
7407 if (TARGET_MACHO && !TARGET_64BIT)
7408 {
7409 if (reg == 0)
7410 reg = gen_reg_rtx (Pmode);
7411 /* Use the generic Mach-O PIC machinery. */
7412 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7413 }
7414 #endif
7415
7416 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7417 new_rtx = addr;
7418 else if (TARGET_64BIT
7419 && ix86_cmodel != CM_SMALL_PIC
7420 && gotoff_operand (addr, Pmode))
7421 {
7422 rtx tmpreg;
7423 /* This symbol may be referenced via a displacement from the PIC
7424 base address (@GOTOFF). */
7425
7426 if (reload_in_progress)
7427 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7428 if (GET_CODE (addr) == CONST)
7429 addr = XEXP (addr, 0);
7430 if (GET_CODE (addr) == PLUS)
7431 {
7432 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7433 UNSPEC_GOTOFF);
7434 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7435 }
7436 else
7437 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7438 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7439 if (!reg)
7440 tmpreg = gen_reg_rtx (Pmode);
7441 else
7442 tmpreg = reg;
7443 emit_move_insn (tmpreg, new_rtx);
7444
7445 if (reg != 0)
7446 {
7447 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7448 tmpreg, 1, OPTAB_DIRECT);
7449 new_rtx = reg;
7450 }
7451 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7452 }
7453 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7454 {
7455 /* This symbol may be referenced via a displacement from the PIC
7456 base address (@GOTOFF). */
7457
7458 if (reload_in_progress)
7459 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7460 if (GET_CODE (addr) == CONST)
7461 addr = XEXP (addr, 0);
7462 if (GET_CODE (addr) == PLUS)
7463 {
7464 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7465 UNSPEC_GOTOFF);
7466 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7467 }
7468 else
7469 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7470 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7471 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7472
7473 if (reg != 0)
7474 {
7475 emit_move_insn (reg, new_rtx);
7476 new_rtx = reg;
7477 }
7478 }
7479 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7480 /* We can't use @GOTOFF for text labels on VxWorks;
7481 see gotoff_operand. */
7482 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7483 {
7484 /* Given that we've already handled dllimport variables separately
7485 in legitimize_address, and all other variables should satisfy
7486 legitimate_pic_address_disp_p, we should never arrive here. */
7487 gcc_assert (!TARGET_64BIT_MS_ABI);
7488
7489 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7490 {
7491 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7492 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7493 new_rtx = gen_const_mem (Pmode, new_rtx);
7494 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7495
7496 if (reg == 0)
7497 reg = gen_reg_rtx (Pmode);
7498 /* Use directly gen_movsi, otherwise the address is loaded
7499 into register for CSE. We don't want to CSE this addresses,
7500 instead we CSE addresses from the GOT table, so skip this. */
7501 emit_insn (gen_movsi (reg, new_rtx));
7502 new_rtx = reg;
7503 }
7504 else
7505 {
7506 /* This symbol must be referenced via a load from the
7507 Global Offset Table (@GOT). */
7508
7509 if (reload_in_progress)
7510 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7511 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7512 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7513 if (TARGET_64BIT)
7514 new_rtx = force_reg (Pmode, new_rtx);
7515 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7516 new_rtx = gen_const_mem (Pmode, new_rtx);
7517 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7518
7519 if (reg == 0)
7520 reg = gen_reg_rtx (Pmode);
7521 emit_move_insn (reg, new_rtx);
7522 new_rtx = reg;
7523 }
7524 }
7525 else
7526 {
7527 if (CONST_INT_P (addr)
7528 && !x86_64_immediate_operand (addr, VOIDmode))
7529 {
7530 if (reg)
7531 {
7532 emit_move_insn (reg, addr);
7533 new_rtx = reg;
7534 }
7535 else
7536 new_rtx = force_reg (Pmode, addr);
7537 }
7538 else if (GET_CODE (addr) == CONST)
7539 {
7540 addr = XEXP (addr, 0);
7541
7542 /* We must match stuff we generate before. Assume the only
7543 unspecs that can get here are ours. Not that we could do
7544 anything with them anyway.... */
7545 if (GET_CODE (addr) == UNSPEC
7546 || (GET_CODE (addr) == PLUS
7547 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7548 return orig;
7549 gcc_assert (GET_CODE (addr) == PLUS);
7550 }
7551 if (GET_CODE (addr) == PLUS)
7552 {
7553 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7554
7555 /* Check first to see if this is a constant offset from a @GOTOFF
7556 symbol reference. */
7557 if (gotoff_operand (op0, Pmode)
7558 && CONST_INT_P (op1))
7559 {
7560 if (!TARGET_64BIT)
7561 {
7562 if (reload_in_progress)
7563 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7564 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7565 UNSPEC_GOTOFF);
7566 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7567 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7568 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7569
7570 if (reg != 0)
7571 {
7572 emit_move_insn (reg, new_rtx);
7573 new_rtx = reg;
7574 }
7575 }
7576 else
7577 {
7578 if (INTVAL (op1) < -16*1024*1024
7579 || INTVAL (op1) >= 16*1024*1024)
7580 {
7581 if (!x86_64_immediate_operand (op1, Pmode))
7582 op1 = force_reg (Pmode, op1);
7583 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7584 }
7585 }
7586 }
7587 else
7588 {
7589 base = legitimize_pic_address (XEXP (addr, 0), reg);
7590 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7591 base == reg ? NULL_RTX : reg);
7592
7593 if (CONST_INT_P (new_rtx))
7594 new_rtx = plus_constant (base, INTVAL (new_rtx));
7595 else
7596 {
7597 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7598 {
7599 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7600 new_rtx = XEXP (new_rtx, 1);
7601 }
7602 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7603 }
7604 }
7605 }
7606 }
7607 return new_rtx;
7608 }
7609 \f
7610 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7611
7612 static rtx
7613 get_thread_pointer (int to_reg)
7614 {
7615 rtx tp, reg, insn;
7616
7617 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7618 if (!to_reg)
7619 return tp;
7620
7621 reg = gen_reg_rtx (Pmode);
7622 insn = gen_rtx_SET (VOIDmode, reg, tp);
7623 insn = emit_insn (insn);
7624
7625 return reg;
7626 }
7627
7628 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7629 false if we expect this to be used for a memory address and true if
7630 we expect to load the address into a register. */
7631
7632 static rtx
7633 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7634 {
7635 rtx dest, base, off, pic, tp;
7636 int type;
7637
7638 switch (model)
7639 {
7640 case TLS_MODEL_GLOBAL_DYNAMIC:
7641 dest = gen_reg_rtx (Pmode);
7642 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7643
7644 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7645 {
7646 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7647
7648 start_sequence ();
7649 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7650 insns = get_insns ();
7651 end_sequence ();
7652
7653 CONST_OR_PURE_CALL_P (insns) = 1;
7654 emit_libcall_block (insns, dest, rax, x);
7655 }
7656 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7657 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7658 else
7659 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7660
7661 if (TARGET_GNU2_TLS)
7662 {
7663 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7664
7665 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7666 }
7667 break;
7668
7669 case TLS_MODEL_LOCAL_DYNAMIC:
7670 base = gen_reg_rtx (Pmode);
7671 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7672
7673 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7674 {
7675 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7676
7677 start_sequence ();
7678 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7679 insns = get_insns ();
7680 end_sequence ();
7681
7682 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7683 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7684 CONST_OR_PURE_CALL_P (insns) = 1;
7685 emit_libcall_block (insns, base, rax, note);
7686 }
7687 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7688 emit_insn (gen_tls_local_dynamic_base_64 (base));
7689 else
7690 emit_insn (gen_tls_local_dynamic_base_32 (base));
7691
7692 if (TARGET_GNU2_TLS)
7693 {
7694 rtx x = ix86_tls_module_base ();
7695
7696 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7697 gen_rtx_MINUS (Pmode, x, tp));
7698 }
7699
7700 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7701 off = gen_rtx_CONST (Pmode, off);
7702
7703 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7704
7705 if (TARGET_GNU2_TLS)
7706 {
7707 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7708
7709 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7710 }
7711
7712 break;
7713
7714 case TLS_MODEL_INITIAL_EXEC:
7715 if (TARGET_64BIT)
7716 {
7717 pic = NULL;
7718 type = UNSPEC_GOTNTPOFF;
7719 }
7720 else if (flag_pic)
7721 {
7722 if (reload_in_progress)
7723 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7724 pic = pic_offset_table_rtx;
7725 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7726 }
7727 else if (!TARGET_ANY_GNU_TLS)
7728 {
7729 pic = gen_reg_rtx (Pmode);
7730 emit_insn (gen_set_got (pic));
7731 type = UNSPEC_GOTTPOFF;
7732 }
7733 else
7734 {
7735 pic = NULL;
7736 type = UNSPEC_INDNTPOFF;
7737 }
7738
7739 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7740 off = gen_rtx_CONST (Pmode, off);
7741 if (pic)
7742 off = gen_rtx_PLUS (Pmode, pic, off);
7743 off = gen_const_mem (Pmode, off);
7744 set_mem_alias_set (off, ix86_GOT_alias_set ());
7745
7746 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7747 {
7748 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7749 off = force_reg (Pmode, off);
7750 return gen_rtx_PLUS (Pmode, base, off);
7751 }
7752 else
7753 {
7754 base = get_thread_pointer (true);
7755 dest = gen_reg_rtx (Pmode);
7756 emit_insn (gen_subsi3 (dest, base, off));
7757 }
7758 break;
7759
7760 case TLS_MODEL_LOCAL_EXEC:
7761 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7762 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7763 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7764 off = gen_rtx_CONST (Pmode, off);
7765
7766 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7767 {
7768 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7769 return gen_rtx_PLUS (Pmode, base, off);
7770 }
7771 else
7772 {
7773 base = get_thread_pointer (true);
7774 dest = gen_reg_rtx (Pmode);
7775 emit_insn (gen_subsi3 (dest, base, off));
7776 }
7777 break;
7778
7779 default:
7780 gcc_unreachable ();
7781 }
7782
7783 return dest;
7784 }
7785
7786 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7787 to symbol DECL. */
7788
7789 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7790 htab_t dllimport_map;
7791
7792 static tree
7793 get_dllimport_decl (tree decl)
7794 {
7795 struct tree_map *h, in;
7796 void **loc;
7797 const char *name;
7798 const char *prefix;
7799 size_t namelen, prefixlen;
7800 char *imp_name;
7801 tree to;
7802 rtx rtl;
7803
7804 if (!dllimport_map)
7805 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7806
7807 in.hash = htab_hash_pointer (decl);
7808 in.base.from = decl;
7809 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7810 h = (struct tree_map *) *loc;
7811 if (h)
7812 return h->to;
7813
7814 *loc = h = GGC_NEW (struct tree_map);
7815 h->hash = in.hash;
7816 h->base.from = decl;
7817 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7818 DECL_ARTIFICIAL (to) = 1;
7819 DECL_IGNORED_P (to) = 1;
7820 DECL_EXTERNAL (to) = 1;
7821 TREE_READONLY (to) = 1;
7822
7823 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7824 name = targetm.strip_name_encoding (name);
7825 prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__";
7826 namelen = strlen (name);
7827 prefixlen = strlen (prefix);
7828 imp_name = (char *) alloca (namelen + prefixlen + 1);
7829 memcpy (imp_name, prefix, prefixlen);
7830 memcpy (imp_name + prefixlen, name, namelen + 1);
7831
7832 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7833 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7834 SET_SYMBOL_REF_DECL (rtl, to);
7835 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7836
7837 rtl = gen_const_mem (Pmode, rtl);
7838 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7839
7840 SET_DECL_RTL (to, rtl);
7841
7842 return to;
7843 }
7844
7845 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7846 true if we require the result be a register. */
7847
7848 static rtx
7849 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7850 {
7851 tree imp_decl;
7852 rtx x;
7853
7854 gcc_assert (SYMBOL_REF_DECL (symbol));
7855 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7856
7857 x = DECL_RTL (imp_decl);
7858 if (want_reg)
7859 x = force_reg (Pmode, x);
7860 return x;
7861 }
7862
7863 /* Try machine-dependent ways of modifying an illegitimate address
7864 to be legitimate. If we find one, return the new, valid address.
7865 This macro is used in only one place: `memory_address' in explow.c.
7866
7867 OLDX is the address as it was before break_out_memory_refs was called.
7868 In some cases it is useful to look at this to decide what needs to be done.
7869
7870 MODE and WIN are passed so that this macro can use
7871 GO_IF_LEGITIMATE_ADDRESS.
7872
7873 It is always safe for this macro to do nothing. It exists to recognize
7874 opportunities to optimize the output.
7875
7876 For the 80386, we handle X+REG by loading X into a register R and
7877 using R+REG. R will go in a general reg and indexing will be used.
7878 However, if REG is a broken-out memory address or multiplication,
7879 nothing needs to be done because REG can certainly go in a general reg.
7880
7881 When -fpic is used, special handling is needed for symbolic references.
7882 See comments by legitimize_pic_address in i386.c for details. */
7883
7884 rtx
7885 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7886 {
7887 int changed = 0;
7888 unsigned log;
7889
7890 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7891 if (log)
7892 return legitimize_tls_address (x, (enum tls_model) log, false);
7893 if (GET_CODE (x) == CONST
7894 && GET_CODE (XEXP (x, 0)) == PLUS
7895 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7896 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7897 {
7898 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
7899 (enum tls_model) log, false);
7900 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7901 }
7902
7903 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7904 {
7905 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7906 return legitimize_dllimport_symbol (x, true);
7907 if (GET_CODE (x) == CONST
7908 && GET_CODE (XEXP (x, 0)) == PLUS
7909 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7910 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7911 {
7912 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7913 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7914 }
7915 }
7916
7917 if (flag_pic && SYMBOLIC_CONST (x))
7918 return legitimize_pic_address (x, 0);
7919
7920 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7921 if (GET_CODE (x) == ASHIFT
7922 && CONST_INT_P (XEXP (x, 1))
7923 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7924 {
7925 changed = 1;
7926 log = INTVAL (XEXP (x, 1));
7927 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7928 GEN_INT (1 << log));
7929 }
7930
7931 if (GET_CODE (x) == PLUS)
7932 {
7933 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7934
7935 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7936 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7937 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7938 {
7939 changed = 1;
7940 log = INTVAL (XEXP (XEXP (x, 0), 1));
7941 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7942 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7943 GEN_INT (1 << log));
7944 }
7945
7946 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7947 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7948 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7949 {
7950 changed = 1;
7951 log = INTVAL (XEXP (XEXP (x, 1), 1));
7952 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7953 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7954 GEN_INT (1 << log));
7955 }
7956
7957 /* Put multiply first if it isn't already. */
7958 if (GET_CODE (XEXP (x, 1)) == MULT)
7959 {
7960 rtx tmp = XEXP (x, 0);
7961 XEXP (x, 0) = XEXP (x, 1);
7962 XEXP (x, 1) = tmp;
7963 changed = 1;
7964 }
7965
7966 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7967 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7968 created by virtual register instantiation, register elimination, and
7969 similar optimizations. */
7970 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7971 {
7972 changed = 1;
7973 x = gen_rtx_PLUS (Pmode,
7974 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7975 XEXP (XEXP (x, 1), 0)),
7976 XEXP (XEXP (x, 1), 1));
7977 }
7978
7979 /* Canonicalize
7980 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7981 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7982 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7983 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7984 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7985 && CONSTANT_P (XEXP (x, 1)))
7986 {
7987 rtx constant;
7988 rtx other = NULL_RTX;
7989
7990 if (CONST_INT_P (XEXP (x, 1)))
7991 {
7992 constant = XEXP (x, 1);
7993 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7994 }
7995 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7996 {
7997 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7998 other = XEXP (x, 1);
7999 }
8000 else
8001 constant = 0;
8002
8003 if (constant)
8004 {
8005 changed = 1;
8006 x = gen_rtx_PLUS (Pmode,
8007 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
8008 XEXP (XEXP (XEXP (x, 0), 1), 0)),
8009 plus_constant (other, INTVAL (constant)));
8010 }
8011 }
8012
8013 if (changed && legitimate_address_p (mode, x, FALSE))
8014 return x;
8015
8016 if (GET_CODE (XEXP (x, 0)) == MULT)
8017 {
8018 changed = 1;
8019 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
8020 }
8021
8022 if (GET_CODE (XEXP (x, 1)) == MULT)
8023 {
8024 changed = 1;
8025 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
8026 }
8027
8028 if (changed
8029 && REG_P (XEXP (x, 1))
8030 && REG_P (XEXP (x, 0)))
8031 return x;
8032
8033 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
8034 {
8035 changed = 1;
8036 x = legitimize_pic_address (x, 0);
8037 }
8038
8039 if (changed && legitimate_address_p (mode, x, FALSE))
8040 return x;
8041
8042 if (REG_P (XEXP (x, 0)))
8043 {
8044 rtx temp = gen_reg_rtx (Pmode);
8045 rtx val = force_operand (XEXP (x, 1), temp);
8046 if (val != temp)
8047 emit_move_insn (temp, val);
8048
8049 XEXP (x, 1) = temp;
8050 return x;
8051 }
8052
8053 else if (REG_P (XEXP (x, 1)))
8054 {
8055 rtx temp = gen_reg_rtx (Pmode);
8056 rtx val = force_operand (XEXP (x, 0), temp);
8057 if (val != temp)
8058 emit_move_insn (temp, val);
8059
8060 XEXP (x, 0) = temp;
8061 return x;
8062 }
8063 }
8064
8065 return x;
8066 }
8067 \f
8068 /* Print an integer constant expression in assembler syntax. Addition
8069 and subtraction are the only arithmetic that may appear in these
8070 expressions. FILE is the stdio stream to write to, X is the rtx, and
8071 CODE is the operand print code from the output string. */
8072
8073 static void
8074 output_pic_addr_const (FILE *file, rtx x, int code)
8075 {
8076 char buf[256];
8077
8078 switch (GET_CODE (x))
8079 {
8080 case PC:
8081 gcc_assert (flag_pic);
8082 putc ('.', file);
8083 break;
8084
8085 case SYMBOL_REF:
8086 if (! TARGET_MACHO || TARGET_64BIT)
8087 output_addr_const (file, x);
8088 else
8089 {
8090 const char *name = XSTR (x, 0);
8091
8092 /* Mark the decl as referenced so that cgraph will
8093 output the function. */
8094 if (SYMBOL_REF_DECL (x))
8095 mark_decl_referenced (SYMBOL_REF_DECL (x));
8096
8097 #if TARGET_MACHO
8098 if (MACHOPIC_INDIRECT
8099 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
8100 name = machopic_indirection_name (x, /*stub_p=*/true);
8101 #endif
8102 assemble_name (file, name);
8103 }
8104 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
8105 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
8106 fputs ("@PLT", file);
8107 break;
8108
8109 case LABEL_REF:
8110 x = XEXP (x, 0);
8111 /* FALLTHRU */
8112 case CODE_LABEL:
8113 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
8114 assemble_name (asm_out_file, buf);
8115 break;
8116
8117 case CONST_INT:
8118 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8119 break;
8120
8121 case CONST:
8122 /* This used to output parentheses around the expression,
8123 but that does not work on the 386 (either ATT or BSD assembler). */
8124 output_pic_addr_const (file, XEXP (x, 0), code);
8125 break;
8126
8127 case CONST_DOUBLE:
8128 if (GET_MODE (x) == VOIDmode)
8129 {
8130 /* We can use %d if the number is <32 bits and positive. */
8131 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
8132 fprintf (file, "0x%lx%08lx",
8133 (unsigned long) CONST_DOUBLE_HIGH (x),
8134 (unsigned long) CONST_DOUBLE_LOW (x));
8135 else
8136 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
8137 }
8138 else
8139 /* We can't handle floating point constants;
8140 PRINT_OPERAND must handle them. */
8141 output_operand_lossage ("floating constant misused");
8142 break;
8143
8144 case PLUS:
8145 /* Some assemblers need integer constants to appear first. */
8146 if (CONST_INT_P (XEXP (x, 0)))
8147 {
8148 output_pic_addr_const (file, XEXP (x, 0), code);
8149 putc ('+', file);
8150 output_pic_addr_const (file, XEXP (x, 1), code);
8151 }
8152 else
8153 {
8154 gcc_assert (CONST_INT_P (XEXP (x, 1)));
8155 output_pic_addr_const (file, XEXP (x, 1), code);
8156 putc ('+', file);
8157 output_pic_addr_const (file, XEXP (x, 0), code);
8158 }
8159 break;
8160
8161 case MINUS:
8162 if (!TARGET_MACHO)
8163 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
8164 output_pic_addr_const (file, XEXP (x, 0), code);
8165 putc ('-', file);
8166 output_pic_addr_const (file, XEXP (x, 1), code);
8167 if (!TARGET_MACHO)
8168 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
8169 break;
8170
8171 case UNSPEC:
8172 gcc_assert (XVECLEN (x, 0) == 1);
8173 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8174 switch (XINT (x, 1))
8175 {
8176 case UNSPEC_GOT:
8177 fputs ("@GOT", file);
8178 break;
8179 case UNSPEC_GOTOFF:
8180 fputs ("@GOTOFF", file);
8181 break;
8182 case UNSPEC_PLTOFF:
8183 fputs ("@PLTOFF", file);
8184 break;
8185 case UNSPEC_GOTPCREL:
8186 fputs ("@GOTPCREL(%rip)", file);
8187 break;
8188 case UNSPEC_GOTTPOFF:
8189 /* FIXME: This might be @TPOFF in Sun ld too. */
8190 fputs ("@GOTTPOFF", file);
8191 break;
8192 case UNSPEC_TPOFF:
8193 fputs ("@TPOFF", file);
8194 break;
8195 case UNSPEC_NTPOFF:
8196 if (TARGET_64BIT)
8197 fputs ("@TPOFF", file);
8198 else
8199 fputs ("@NTPOFF", file);
8200 break;
8201 case UNSPEC_DTPOFF:
8202 fputs ("@DTPOFF", file);
8203 break;
8204 case UNSPEC_GOTNTPOFF:
8205 if (TARGET_64BIT)
8206 fputs ("@GOTTPOFF(%rip)", file);
8207 else
8208 fputs ("@GOTNTPOFF", file);
8209 break;
8210 case UNSPEC_INDNTPOFF:
8211 fputs ("@INDNTPOFF", file);
8212 break;
8213 default:
8214 output_operand_lossage ("invalid UNSPEC as operand");
8215 break;
8216 }
8217 break;
8218
8219 default:
8220 output_operand_lossage ("invalid expression as operand");
8221 }
8222 }
8223
8224 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8225 We need to emit DTP-relative relocations. */
8226
8227 static void ATTRIBUTE_UNUSED
8228 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8229 {
8230 fputs (ASM_LONG, file);
8231 output_addr_const (file, x);
8232 fputs ("@DTPOFF", file);
8233 switch (size)
8234 {
8235 case 4:
8236 break;
8237 case 8:
8238 fputs (", 0", file);
8239 break;
8240 default:
8241 gcc_unreachable ();
8242 }
8243 }
8244
8245 /* In the name of slightly smaller debug output, and to cater to
8246 general assembler lossage, recognize PIC+GOTOFF and turn it back
8247 into a direct symbol reference.
8248
8249 On Darwin, this is necessary to avoid a crash, because Darwin
8250 has a different PIC label for each routine but the DWARF debugging
8251 information is not associated with any particular routine, so it's
8252 necessary to remove references to the PIC label from RTL stored by
8253 the DWARF output code. */
8254
8255 static rtx
8256 ix86_delegitimize_address (rtx orig_x)
8257 {
8258 rtx x = orig_x;
8259 /* reg_addend is NULL or a multiple of some register. */
8260 rtx reg_addend = NULL_RTX;
8261 /* const_addend is NULL or a const_int. */
8262 rtx const_addend = NULL_RTX;
8263 /* This is the result, or NULL. */
8264 rtx result = NULL_RTX;
8265
8266 if (MEM_P (x))
8267 x = XEXP (x, 0);
8268
8269 if (TARGET_64BIT)
8270 {
8271 if (GET_CODE (x) != CONST
8272 || GET_CODE (XEXP (x, 0)) != UNSPEC
8273 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8274 || !MEM_P (orig_x))
8275 return orig_x;
8276 return XVECEXP (XEXP (x, 0), 0, 0);
8277 }
8278
8279 if (GET_CODE (x) != PLUS
8280 || GET_CODE (XEXP (x, 1)) != CONST)
8281 return orig_x;
8282
8283 if (REG_P (XEXP (x, 0))
8284 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8285 /* %ebx + GOT/GOTOFF */
8286 ;
8287 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8288 {
8289 /* %ebx + %reg * scale + GOT/GOTOFF */
8290 reg_addend = XEXP (x, 0);
8291 if (REG_P (XEXP (reg_addend, 0))
8292 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8293 reg_addend = XEXP (reg_addend, 1);
8294 else if (REG_P (XEXP (reg_addend, 1))
8295 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8296 reg_addend = XEXP (reg_addend, 0);
8297 else
8298 return orig_x;
8299 if (!REG_P (reg_addend)
8300 && GET_CODE (reg_addend) != MULT
8301 && GET_CODE (reg_addend) != ASHIFT)
8302 return orig_x;
8303 }
8304 else
8305 return orig_x;
8306
8307 x = XEXP (XEXP (x, 1), 0);
8308 if (GET_CODE (x) == PLUS
8309 && CONST_INT_P (XEXP (x, 1)))
8310 {
8311 const_addend = XEXP (x, 1);
8312 x = XEXP (x, 0);
8313 }
8314
8315 if (GET_CODE (x) == UNSPEC
8316 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8317 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8318 result = XVECEXP (x, 0, 0);
8319
8320 if (TARGET_MACHO && darwin_local_data_pic (x)
8321 && !MEM_P (orig_x))
8322 result = XEXP (x, 0);
8323
8324 if (! result)
8325 return orig_x;
8326
8327 if (const_addend)
8328 result = gen_rtx_PLUS (Pmode, result, const_addend);
8329 if (reg_addend)
8330 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8331 return result;
8332 }
8333
8334 /* If X is a machine specific address (i.e. a symbol or label being
8335 referenced as a displacement from the GOT implemented using an
8336 UNSPEC), then return the base term. Otherwise return X. */
8337
8338 rtx
8339 ix86_find_base_term (rtx x)
8340 {
8341 rtx term;
8342
8343 if (TARGET_64BIT)
8344 {
8345 if (GET_CODE (x) != CONST)
8346 return x;
8347 term = XEXP (x, 0);
8348 if (GET_CODE (term) == PLUS
8349 && (CONST_INT_P (XEXP (term, 1))
8350 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8351 term = XEXP (term, 0);
8352 if (GET_CODE (term) != UNSPEC
8353 || XINT (term, 1) != UNSPEC_GOTPCREL)
8354 return x;
8355
8356 term = XVECEXP (term, 0, 0);
8357
8358 if (GET_CODE (term) != SYMBOL_REF
8359 && GET_CODE (term) != LABEL_REF)
8360 return x;
8361
8362 return term;
8363 }
8364
8365 term = ix86_delegitimize_address (x);
8366
8367 if (GET_CODE (term) != SYMBOL_REF
8368 && GET_CODE (term) != LABEL_REF)
8369 return x;
8370
8371 return term;
8372 }
8373 \f
8374 static void
8375 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8376 int fp, FILE *file)
8377 {
8378 const char *suffix;
8379
8380 if (mode == CCFPmode || mode == CCFPUmode)
8381 {
8382 enum rtx_code second_code, bypass_code;
8383 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8384 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8385 code = ix86_fp_compare_code_to_integer (code);
8386 mode = CCmode;
8387 }
8388 if (reverse)
8389 code = reverse_condition (code);
8390
8391 switch (code)
8392 {
8393 case EQ:
8394 switch (mode)
8395 {
8396 case CCAmode:
8397 suffix = "a";
8398 break;
8399
8400 case CCCmode:
8401 suffix = "c";
8402 break;
8403
8404 case CCOmode:
8405 suffix = "o";
8406 break;
8407
8408 case CCSmode:
8409 suffix = "s";
8410 break;
8411
8412 default:
8413 suffix = "e";
8414 }
8415 break;
8416 case NE:
8417 switch (mode)
8418 {
8419 case CCAmode:
8420 suffix = "na";
8421 break;
8422
8423 case CCCmode:
8424 suffix = "nc";
8425 break;
8426
8427 case CCOmode:
8428 suffix = "no";
8429 break;
8430
8431 case CCSmode:
8432 suffix = "ns";
8433 break;
8434
8435 default:
8436 suffix = "ne";
8437 }
8438 break;
8439 case GT:
8440 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8441 suffix = "g";
8442 break;
8443 case GTU:
8444 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8445 Those same assemblers have the same but opposite lossage on cmov. */
8446 if (mode == CCmode)
8447 suffix = fp ? "nbe" : "a";
8448 else if (mode == CCCmode)
8449 suffix = "b";
8450 else
8451 gcc_unreachable ();
8452 break;
8453 case LT:
8454 switch (mode)
8455 {
8456 case CCNOmode:
8457 case CCGOCmode:
8458 suffix = "s";
8459 break;
8460
8461 case CCmode:
8462 case CCGCmode:
8463 suffix = "l";
8464 break;
8465
8466 default:
8467 gcc_unreachable ();
8468 }
8469 break;
8470 case LTU:
8471 gcc_assert (mode == CCmode || mode == CCCmode);
8472 suffix = "b";
8473 break;
8474 case GE:
8475 switch (mode)
8476 {
8477 case CCNOmode:
8478 case CCGOCmode:
8479 suffix = "ns";
8480 break;
8481
8482 case CCmode:
8483 case CCGCmode:
8484 suffix = "ge";
8485 break;
8486
8487 default:
8488 gcc_unreachable ();
8489 }
8490 break;
8491 case GEU:
8492 /* ??? As above. */
8493 gcc_assert (mode == CCmode || mode == CCCmode);
8494 suffix = fp ? "nb" : "ae";
8495 break;
8496 case LE:
8497 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8498 suffix = "le";
8499 break;
8500 case LEU:
8501 /* ??? As above. */
8502 if (mode == CCmode)
8503 suffix = "be";
8504 else if (mode == CCCmode)
8505 suffix = fp ? "nb" : "ae";
8506 else
8507 gcc_unreachable ();
8508 break;
8509 case UNORDERED:
8510 suffix = fp ? "u" : "p";
8511 break;
8512 case ORDERED:
8513 suffix = fp ? "nu" : "np";
8514 break;
8515 default:
8516 gcc_unreachable ();
8517 }
8518 fputs (suffix, file);
8519 }
8520
8521 /* Print the name of register X to FILE based on its machine mode and number.
8522 If CODE is 'w', pretend the mode is HImode.
8523 If CODE is 'b', pretend the mode is QImode.
8524 If CODE is 'k', pretend the mode is SImode.
8525 If CODE is 'q', pretend the mode is DImode.
8526 If CODE is 'h', pretend the reg is the 'high' byte register.
8527 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8528
8529 void
8530 print_reg (rtx x, int code, FILE *file)
8531 {
8532 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8533 && REGNO (x) != FRAME_POINTER_REGNUM
8534 && REGNO (x) != FLAGS_REG
8535 && REGNO (x) != FPSR_REG
8536 && REGNO (x) != FPCR_REG);
8537
8538 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8539 putc ('%', file);
8540
8541 if (code == 'w' || MMX_REG_P (x))
8542 code = 2;
8543 else if (code == 'b')
8544 code = 1;
8545 else if (code == 'k')
8546 code = 4;
8547 else if (code == 'q')
8548 code = 8;
8549 else if (code == 'y')
8550 code = 3;
8551 else if (code == 'h')
8552 code = 0;
8553 else
8554 code = GET_MODE_SIZE (GET_MODE (x));
8555
8556 /* Irritatingly, AMD extended registers use different naming convention
8557 from the normal registers. */
8558 if (REX_INT_REG_P (x))
8559 {
8560 gcc_assert (TARGET_64BIT);
8561 switch (code)
8562 {
8563 case 0:
8564 error ("extended registers have no high halves");
8565 break;
8566 case 1:
8567 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8568 break;
8569 case 2:
8570 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8571 break;
8572 case 4:
8573 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8574 break;
8575 case 8:
8576 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8577 break;
8578 default:
8579 error ("unsupported operand size for extended register");
8580 break;
8581 }
8582 return;
8583 }
8584 switch (code)
8585 {
8586 case 3:
8587 if (STACK_TOP_P (x))
8588 {
8589 fputs ("st(0)", file);
8590 break;
8591 }
8592 /* FALLTHRU */
8593 case 8:
8594 case 4:
8595 case 12:
8596 if (! ANY_FP_REG_P (x))
8597 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8598 /* FALLTHRU */
8599 case 16:
8600 case 2:
8601 normal:
8602 fputs (hi_reg_name[REGNO (x)], file);
8603 break;
8604 case 1:
8605 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8606 goto normal;
8607 fputs (qi_reg_name[REGNO (x)], file);
8608 break;
8609 case 0:
8610 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8611 goto normal;
8612 fputs (qi_high_reg_name[REGNO (x)], file);
8613 break;
8614 default:
8615 gcc_unreachable ();
8616 }
8617 }
8618
8619 /* Locate some local-dynamic symbol still in use by this function
8620 so that we can print its name in some tls_local_dynamic_base
8621 pattern. */
8622
8623 static int
8624 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8625 {
8626 rtx x = *px;
8627
8628 if (GET_CODE (x) == SYMBOL_REF
8629 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8630 {
8631 cfun->machine->some_ld_name = XSTR (x, 0);
8632 return 1;
8633 }
8634
8635 return 0;
8636 }
8637
8638 static const char *
8639 get_some_local_dynamic_name (void)
8640 {
8641 rtx insn;
8642
8643 if (cfun->machine->some_ld_name)
8644 return cfun->machine->some_ld_name;
8645
8646 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8647 if (INSN_P (insn)
8648 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8649 return cfun->machine->some_ld_name;
8650
8651 gcc_unreachable ();
8652 }
8653
8654 /* Meaning of CODE:
8655 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8656 C -- print opcode suffix for set/cmov insn.
8657 c -- like C, but print reversed condition
8658 F,f -- likewise, but for floating-point.
8659 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8660 otherwise nothing
8661 R -- print the prefix for register names.
8662 z -- print the opcode suffix for the size of the current operand.
8663 * -- print a star (in certain assembler syntax)
8664 A -- print an absolute memory reference.
8665 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8666 s -- print a shift double count, followed by the assemblers argument
8667 delimiter.
8668 b -- print the QImode name of the register for the indicated operand.
8669 %b0 would print %al if operands[0] is reg 0.
8670 w -- likewise, print the HImode name of the register.
8671 k -- likewise, print the SImode name of the register.
8672 q -- likewise, print the DImode name of the register.
8673 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8674 y -- print "st(0)" instead of "st" as a register.
8675 D -- print condition for SSE cmp instruction.
8676 P -- if PIC, print an @PLT suffix.
8677 X -- don't print any sort of PIC '@' suffix for a symbol.
8678 & -- print some in-use local-dynamic symbol name.
8679 H -- print a memory address offset by 8; used for sse high-parts
8680 Y -- print condition for SSE5 com* instruction.
8681 + -- print a branch hint as 'cs' or 'ds' prefix
8682 ; -- print a semicolon (after prefixes due to bug in older gas).
8683 */
8684
8685 void
8686 print_operand (FILE *file, rtx x, int code)
8687 {
8688 if (code)
8689 {
8690 switch (code)
8691 {
8692 case '*':
8693 if (ASSEMBLER_DIALECT == ASM_ATT)
8694 putc ('*', file);
8695 return;
8696
8697 case '&':
8698 assemble_name (file, get_some_local_dynamic_name ());
8699 return;
8700
8701 case 'A':
8702 switch (ASSEMBLER_DIALECT)
8703 {
8704 case ASM_ATT:
8705 putc ('*', file);
8706 break;
8707
8708 case ASM_INTEL:
8709 /* Intel syntax. For absolute addresses, registers should not
8710 be surrounded by braces. */
8711 if (!REG_P (x))
8712 {
8713 putc ('[', file);
8714 PRINT_OPERAND (file, x, 0);
8715 putc (']', file);
8716 return;
8717 }
8718 break;
8719
8720 default:
8721 gcc_unreachable ();
8722 }
8723
8724 PRINT_OPERAND (file, x, 0);
8725 return;
8726
8727
8728 case 'L':
8729 if (ASSEMBLER_DIALECT == ASM_ATT)
8730 putc ('l', file);
8731 return;
8732
8733 case 'W':
8734 if (ASSEMBLER_DIALECT == ASM_ATT)
8735 putc ('w', file);
8736 return;
8737
8738 case 'B':
8739 if (ASSEMBLER_DIALECT == ASM_ATT)
8740 putc ('b', file);
8741 return;
8742
8743 case 'Q':
8744 if (ASSEMBLER_DIALECT == ASM_ATT)
8745 putc ('l', file);
8746 return;
8747
8748 case 'S':
8749 if (ASSEMBLER_DIALECT == ASM_ATT)
8750 putc ('s', file);
8751 return;
8752
8753 case 'T':
8754 if (ASSEMBLER_DIALECT == ASM_ATT)
8755 putc ('t', file);
8756 return;
8757
8758 case 'z':
8759 /* 387 opcodes don't get size suffixes if the operands are
8760 registers. */
8761 if (STACK_REG_P (x))
8762 return;
8763
8764 /* Likewise if using Intel opcodes. */
8765 if (ASSEMBLER_DIALECT == ASM_INTEL)
8766 return;
8767
8768 /* This is the size of op from size of operand. */
8769 switch (GET_MODE_SIZE (GET_MODE (x)))
8770 {
8771 case 1:
8772 putc ('b', file);
8773 return;
8774
8775 case 2:
8776 if (MEM_P (x))
8777 {
8778 #ifdef HAVE_GAS_FILDS_FISTS
8779 putc ('s', file);
8780 #endif
8781 return;
8782 }
8783 else
8784 putc ('w', file);
8785 return;
8786
8787 case 4:
8788 if (GET_MODE (x) == SFmode)
8789 {
8790 putc ('s', file);
8791 return;
8792 }
8793 else
8794 putc ('l', file);
8795 return;
8796
8797 case 12:
8798 case 16:
8799 putc ('t', file);
8800 return;
8801
8802 case 8:
8803 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8804 {
8805 #ifdef GAS_MNEMONICS
8806 putc ('q', file);
8807 #else
8808 putc ('l', file);
8809 putc ('l', file);
8810 #endif
8811 }
8812 else
8813 putc ('l', file);
8814 return;
8815
8816 default:
8817 gcc_unreachable ();
8818 }
8819
8820 case 'b':
8821 case 'w':
8822 case 'k':
8823 case 'q':
8824 case 'h':
8825 case 'y':
8826 case 'X':
8827 case 'P':
8828 break;
8829
8830 case 's':
8831 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8832 {
8833 PRINT_OPERAND (file, x, 0);
8834 putc (',', file);
8835 }
8836 return;
8837
8838 case 'D':
8839 /* Little bit of braindamage here. The SSE compare instructions
8840 does use completely different names for the comparisons that the
8841 fp conditional moves. */
8842 switch (GET_CODE (x))
8843 {
8844 case EQ:
8845 case UNEQ:
8846 fputs ("eq", file);
8847 break;
8848 case LT:
8849 case UNLT:
8850 fputs ("lt", file);
8851 break;
8852 case LE:
8853 case UNLE:
8854 fputs ("le", file);
8855 break;
8856 case UNORDERED:
8857 fputs ("unord", file);
8858 break;
8859 case NE:
8860 case LTGT:
8861 fputs ("neq", file);
8862 break;
8863 case UNGE:
8864 case GE:
8865 fputs ("nlt", file);
8866 break;
8867 case UNGT:
8868 case GT:
8869 fputs ("nle", file);
8870 break;
8871 case ORDERED:
8872 fputs ("ord", file);
8873 break;
8874 default:
8875 gcc_unreachable ();
8876 }
8877 return;
8878 case 'O':
8879 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8880 if (ASSEMBLER_DIALECT == ASM_ATT)
8881 {
8882 switch (GET_MODE (x))
8883 {
8884 case HImode: putc ('w', file); break;
8885 case SImode:
8886 case SFmode: putc ('l', file); break;
8887 case DImode:
8888 case DFmode: putc ('q', file); break;
8889 default: gcc_unreachable ();
8890 }
8891 putc ('.', file);
8892 }
8893 #endif
8894 return;
8895 case 'C':
8896 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8897 return;
8898 case 'F':
8899 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8900 if (ASSEMBLER_DIALECT == ASM_ATT)
8901 putc ('.', file);
8902 #endif
8903 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8904 return;
8905
8906 /* Like above, but reverse condition */
8907 case 'c':
8908 /* Check to see if argument to %c is really a constant
8909 and not a condition code which needs to be reversed. */
8910 if (!COMPARISON_P (x))
8911 {
8912 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8913 return;
8914 }
8915 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8916 return;
8917 case 'f':
8918 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8919 if (ASSEMBLER_DIALECT == ASM_ATT)
8920 putc ('.', file);
8921 #endif
8922 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8923 return;
8924
8925 case 'H':
8926 /* It doesn't actually matter what mode we use here, as we're
8927 only going to use this for printing. */
8928 x = adjust_address_nv (x, DImode, 8);
8929 break;
8930
8931 case '+':
8932 {
8933 rtx x;
8934
8935 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8936 return;
8937
8938 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8939 if (x)
8940 {
8941 int pred_val = INTVAL (XEXP (x, 0));
8942
8943 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8944 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8945 {
8946 int taken = pred_val > REG_BR_PROB_BASE / 2;
8947 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8948
8949 /* Emit hints only in the case default branch prediction
8950 heuristics would fail. */
8951 if (taken != cputaken)
8952 {
8953 /* We use 3e (DS) prefix for taken branches and
8954 2e (CS) prefix for not taken branches. */
8955 if (taken)
8956 fputs ("ds ; ", file);
8957 else
8958 fputs ("cs ; ", file);
8959 }
8960 }
8961 }
8962 return;
8963 }
8964
8965 case 'Y':
8966 switch (GET_CODE (x))
8967 {
8968 case NE:
8969 fputs ("neq", file);
8970 break;
8971 case EQ:
8972 fputs ("eq", file);
8973 break;
8974 case GE:
8975 case GEU:
8976 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
8977 break;
8978 case GT:
8979 case GTU:
8980 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
8981 break;
8982 case LE:
8983 case LEU:
8984 fputs ("le", file);
8985 break;
8986 case LT:
8987 case LTU:
8988 fputs ("lt", file);
8989 break;
8990 case UNORDERED:
8991 fputs ("unord", file);
8992 break;
8993 case ORDERED:
8994 fputs ("ord", file);
8995 break;
8996 case UNEQ:
8997 fputs ("ueq", file);
8998 break;
8999 case UNGE:
9000 fputs ("nlt", file);
9001 break;
9002 case UNGT:
9003 fputs ("nle", file);
9004 break;
9005 case UNLE:
9006 fputs ("ule", file);
9007 break;
9008 case UNLT:
9009 fputs ("ult", file);
9010 break;
9011 case LTGT:
9012 fputs ("une", file);
9013 break;
9014 default:
9015 gcc_unreachable ();
9016 }
9017 return;
9018
9019 case ';':
9020 #if TARGET_MACHO
9021 fputs (" ; ", file);
9022 #else
9023 fputc (' ', file);
9024 #endif
9025 return;
9026
9027 default:
9028 output_operand_lossage ("invalid operand code '%c'", code);
9029 }
9030 }
9031
9032 if (REG_P (x))
9033 print_reg (x, code, file);
9034
9035 else if (MEM_P (x))
9036 {
9037 /* No `byte ptr' prefix for call instructions. */
9038 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
9039 {
9040 const char * size;
9041 switch (GET_MODE_SIZE (GET_MODE (x)))
9042 {
9043 case 1: size = "BYTE"; break;
9044 case 2: size = "WORD"; break;
9045 case 4: size = "DWORD"; break;
9046 case 8: size = "QWORD"; break;
9047 case 12: size = "XWORD"; break;
9048 case 16: size = "XMMWORD"; break;
9049 default:
9050 gcc_unreachable ();
9051 }
9052
9053 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9054 if (code == 'b')
9055 size = "BYTE";
9056 else if (code == 'w')
9057 size = "WORD";
9058 else if (code == 'k')
9059 size = "DWORD";
9060
9061 fputs (size, file);
9062 fputs (" PTR ", file);
9063 }
9064
9065 x = XEXP (x, 0);
9066 /* Avoid (%rip) for call operands. */
9067 if (CONSTANT_ADDRESS_P (x) && code == 'P'
9068 && !CONST_INT_P (x))
9069 output_addr_const (file, x);
9070 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
9071 output_operand_lossage ("invalid constraints for operand");
9072 else
9073 output_address (x);
9074 }
9075
9076 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
9077 {
9078 REAL_VALUE_TYPE r;
9079 long l;
9080
9081 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9082 REAL_VALUE_TO_TARGET_SINGLE (r, l);
9083
9084 if (ASSEMBLER_DIALECT == ASM_ATT)
9085 putc ('$', file);
9086 fprintf (file, "0x%08lx", l);
9087 }
9088
9089 /* These float cases don't actually occur as immediate operands. */
9090 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
9091 {
9092 char dstr[30];
9093
9094 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9095 fprintf (file, "%s", dstr);
9096 }
9097
9098 else if (GET_CODE (x) == CONST_DOUBLE
9099 && GET_MODE (x) == XFmode)
9100 {
9101 char dstr[30];
9102
9103 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9104 fprintf (file, "%s", dstr);
9105 }
9106
9107 else
9108 {
9109 /* We have patterns that allow zero sets of memory, for instance.
9110 In 64-bit mode, we should probably support all 8-byte vectors,
9111 since we can in fact encode that into an immediate. */
9112 if (GET_CODE (x) == CONST_VECTOR)
9113 {
9114 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
9115 x = const0_rtx;
9116 }
9117
9118 if (code != 'P')
9119 {
9120 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
9121 {
9122 if (ASSEMBLER_DIALECT == ASM_ATT)
9123 putc ('$', file);
9124 }
9125 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
9126 || GET_CODE (x) == LABEL_REF)
9127 {
9128 if (ASSEMBLER_DIALECT == ASM_ATT)
9129 putc ('$', file);
9130 else
9131 fputs ("OFFSET FLAT:", file);
9132 }
9133 }
9134 if (CONST_INT_P (x))
9135 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9136 else if (flag_pic)
9137 output_pic_addr_const (file, x, code);
9138 else
9139 output_addr_const (file, x);
9140 }
9141 }
9142 \f
9143 /* Print a memory operand whose address is ADDR. */
9144
9145 void
9146 print_operand_address (FILE *file, rtx addr)
9147 {
9148 struct ix86_address parts;
9149 rtx base, index, disp;
9150 int scale;
9151 int ok = ix86_decompose_address (addr, &parts);
9152
9153 gcc_assert (ok);
9154
9155 base = parts.base;
9156 index = parts.index;
9157 disp = parts.disp;
9158 scale = parts.scale;
9159
9160 switch (parts.seg)
9161 {
9162 case SEG_DEFAULT:
9163 break;
9164 case SEG_FS:
9165 case SEG_GS:
9166 if (USER_LABEL_PREFIX[0] == 0)
9167 putc ('%', file);
9168 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
9169 break;
9170 default:
9171 gcc_unreachable ();
9172 }
9173
9174 if (!base && !index)
9175 {
9176 /* Displacement only requires special attention. */
9177
9178 if (CONST_INT_P (disp))
9179 {
9180 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
9181 {
9182 if (USER_LABEL_PREFIX[0] == 0)
9183 putc ('%', file);
9184 fputs ("ds:", file);
9185 }
9186 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
9187 }
9188 else if (flag_pic)
9189 output_pic_addr_const (file, disp, 0);
9190 else
9191 output_addr_const (file, disp);
9192
9193 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9194 if (TARGET_64BIT)
9195 {
9196 if (GET_CODE (disp) == CONST
9197 && GET_CODE (XEXP (disp, 0)) == PLUS
9198 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9199 disp = XEXP (XEXP (disp, 0), 0);
9200 if (GET_CODE (disp) == LABEL_REF
9201 || (GET_CODE (disp) == SYMBOL_REF
9202 && SYMBOL_REF_TLS_MODEL (disp) == 0))
9203 fputs ("(%rip)", file);
9204 }
9205 }
9206 else
9207 {
9208 if (ASSEMBLER_DIALECT == ASM_ATT)
9209 {
9210 if (disp)
9211 {
9212 if (flag_pic)
9213 output_pic_addr_const (file, disp, 0);
9214 else if (GET_CODE (disp) == LABEL_REF)
9215 output_asm_label (disp);
9216 else
9217 output_addr_const (file, disp);
9218 }
9219
9220 putc ('(', file);
9221 if (base)
9222 print_reg (base, 0, file);
9223 if (index)
9224 {
9225 putc (',', file);
9226 print_reg (index, 0, file);
9227 if (scale != 1)
9228 fprintf (file, ",%d", scale);
9229 }
9230 putc (')', file);
9231 }
9232 else
9233 {
9234 rtx offset = NULL_RTX;
9235
9236 if (disp)
9237 {
9238 /* Pull out the offset of a symbol; print any symbol itself. */
9239 if (GET_CODE (disp) == CONST
9240 && GET_CODE (XEXP (disp, 0)) == PLUS
9241 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9242 {
9243 offset = XEXP (XEXP (disp, 0), 1);
9244 disp = gen_rtx_CONST (VOIDmode,
9245 XEXP (XEXP (disp, 0), 0));
9246 }
9247
9248 if (flag_pic)
9249 output_pic_addr_const (file, disp, 0);
9250 else if (GET_CODE (disp) == LABEL_REF)
9251 output_asm_label (disp);
9252 else if (CONST_INT_P (disp))
9253 offset = disp;
9254 else
9255 output_addr_const (file, disp);
9256 }
9257
9258 putc ('[', file);
9259 if (base)
9260 {
9261 print_reg (base, 0, file);
9262 if (offset)
9263 {
9264 if (INTVAL (offset) >= 0)
9265 putc ('+', file);
9266 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9267 }
9268 }
9269 else if (offset)
9270 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9271 else
9272 putc ('0', file);
9273
9274 if (index)
9275 {
9276 putc ('+', file);
9277 print_reg (index, 0, file);
9278 if (scale != 1)
9279 fprintf (file, "*%d", scale);
9280 }
9281 putc (']', file);
9282 }
9283 }
9284 }
9285
9286 bool
9287 output_addr_const_extra (FILE *file, rtx x)
9288 {
9289 rtx op;
9290
9291 if (GET_CODE (x) != UNSPEC)
9292 return false;
9293
9294 op = XVECEXP (x, 0, 0);
9295 switch (XINT (x, 1))
9296 {
9297 case UNSPEC_GOTTPOFF:
9298 output_addr_const (file, op);
9299 /* FIXME: This might be @TPOFF in Sun ld. */
9300 fputs ("@GOTTPOFF", file);
9301 break;
9302 case UNSPEC_TPOFF:
9303 output_addr_const (file, op);
9304 fputs ("@TPOFF", file);
9305 break;
9306 case UNSPEC_NTPOFF:
9307 output_addr_const (file, op);
9308 if (TARGET_64BIT)
9309 fputs ("@TPOFF", file);
9310 else
9311 fputs ("@NTPOFF", file);
9312 break;
9313 case UNSPEC_DTPOFF:
9314 output_addr_const (file, op);
9315 fputs ("@DTPOFF", file);
9316 break;
9317 case UNSPEC_GOTNTPOFF:
9318 output_addr_const (file, op);
9319 if (TARGET_64BIT)
9320 fputs ("@GOTTPOFF(%rip)", file);
9321 else
9322 fputs ("@GOTNTPOFF", file);
9323 break;
9324 case UNSPEC_INDNTPOFF:
9325 output_addr_const (file, op);
9326 fputs ("@INDNTPOFF", file);
9327 break;
9328
9329 default:
9330 return false;
9331 }
9332
9333 return true;
9334 }
9335 \f
9336 /* Split one or more DImode RTL references into pairs of SImode
9337 references. The RTL can be REG, offsettable MEM, integer constant, or
9338 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9339 split and "num" is its length. lo_half and hi_half are output arrays
9340 that parallel "operands". */
9341
9342 void
9343 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9344 {
9345 while (num--)
9346 {
9347 rtx op = operands[num];
9348
9349 /* simplify_subreg refuse to split volatile memory addresses,
9350 but we still have to handle it. */
9351 if (MEM_P (op))
9352 {
9353 lo_half[num] = adjust_address (op, SImode, 0);
9354 hi_half[num] = adjust_address (op, SImode, 4);
9355 }
9356 else
9357 {
9358 lo_half[num] = simplify_gen_subreg (SImode, op,
9359 GET_MODE (op) == VOIDmode
9360 ? DImode : GET_MODE (op), 0);
9361 hi_half[num] = simplify_gen_subreg (SImode, op,
9362 GET_MODE (op) == VOIDmode
9363 ? DImode : GET_MODE (op), 4);
9364 }
9365 }
9366 }
9367 /* Split one or more TImode RTL references into pairs of DImode
9368 references. The RTL can be REG, offsettable MEM, integer constant, or
9369 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9370 split and "num" is its length. lo_half and hi_half are output arrays
9371 that parallel "operands". */
9372
9373 void
9374 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9375 {
9376 while (num--)
9377 {
9378 rtx op = operands[num];
9379
9380 /* simplify_subreg refuse to split volatile memory addresses, but we
9381 still have to handle it. */
9382 if (MEM_P (op))
9383 {
9384 lo_half[num] = adjust_address (op, DImode, 0);
9385 hi_half[num] = adjust_address (op, DImode, 8);
9386 }
9387 else
9388 {
9389 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9390 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9391 }
9392 }
9393 }
9394 \f
9395 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9396 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9397 is the expression of the binary operation. The output may either be
9398 emitted here, or returned to the caller, like all output_* functions.
9399
9400 There is no guarantee that the operands are the same mode, as they
9401 might be within FLOAT or FLOAT_EXTEND expressions. */
9402
9403 #ifndef SYSV386_COMPAT
9404 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9405 wants to fix the assemblers because that causes incompatibility
9406 with gcc. No-one wants to fix gcc because that causes
9407 incompatibility with assemblers... You can use the option of
9408 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9409 #define SYSV386_COMPAT 1
9410 #endif
9411
9412 const char *
9413 output_387_binary_op (rtx insn, rtx *operands)
9414 {
9415 static char buf[30];
9416 const char *p;
9417 const char *ssep;
9418 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9419
9420 #ifdef ENABLE_CHECKING
9421 /* Even if we do not want to check the inputs, this documents input
9422 constraints. Which helps in understanding the following code. */
9423 if (STACK_REG_P (operands[0])
9424 && ((REG_P (operands[1])
9425 && REGNO (operands[0]) == REGNO (operands[1])
9426 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9427 || (REG_P (operands[2])
9428 && REGNO (operands[0]) == REGNO (operands[2])
9429 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9430 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9431 ; /* ok */
9432 else
9433 gcc_assert (is_sse);
9434 #endif
9435
9436 switch (GET_CODE (operands[3]))
9437 {
9438 case PLUS:
9439 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9440 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9441 p = "fiadd";
9442 else
9443 p = "fadd";
9444 ssep = "add";
9445 break;
9446
9447 case MINUS:
9448 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9449 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9450 p = "fisub";
9451 else
9452 p = "fsub";
9453 ssep = "sub";
9454 break;
9455
9456 case MULT:
9457 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9458 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9459 p = "fimul";
9460 else
9461 p = "fmul";
9462 ssep = "mul";
9463 break;
9464
9465 case DIV:
9466 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9467 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9468 p = "fidiv";
9469 else
9470 p = "fdiv";
9471 ssep = "div";
9472 break;
9473
9474 default:
9475 gcc_unreachable ();
9476 }
9477
9478 if (is_sse)
9479 {
9480 strcpy (buf, ssep);
9481 if (GET_MODE (operands[0]) == SFmode)
9482 strcat (buf, "ss\t{%2, %0|%0, %2}");
9483 else
9484 strcat (buf, "sd\t{%2, %0|%0, %2}");
9485 return buf;
9486 }
9487 strcpy (buf, p);
9488
9489 switch (GET_CODE (operands[3]))
9490 {
9491 case MULT:
9492 case PLUS:
9493 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9494 {
9495 rtx temp = operands[2];
9496 operands[2] = operands[1];
9497 operands[1] = temp;
9498 }
9499
9500 /* know operands[0] == operands[1]. */
9501
9502 if (MEM_P (operands[2]))
9503 {
9504 p = "%z2\t%2";
9505 break;
9506 }
9507
9508 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9509 {
9510 if (STACK_TOP_P (operands[0]))
9511 /* How is it that we are storing to a dead operand[2]?
9512 Well, presumably operands[1] is dead too. We can't
9513 store the result to st(0) as st(0) gets popped on this
9514 instruction. Instead store to operands[2] (which I
9515 think has to be st(1)). st(1) will be popped later.
9516 gcc <= 2.8.1 didn't have this check and generated
9517 assembly code that the Unixware assembler rejected. */
9518 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9519 else
9520 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9521 break;
9522 }
9523
9524 if (STACK_TOP_P (operands[0]))
9525 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9526 else
9527 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9528 break;
9529
9530 case MINUS:
9531 case DIV:
9532 if (MEM_P (operands[1]))
9533 {
9534 p = "r%z1\t%1";
9535 break;
9536 }
9537
9538 if (MEM_P (operands[2]))
9539 {
9540 p = "%z2\t%2";
9541 break;
9542 }
9543
9544 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9545 {
9546 #if SYSV386_COMPAT
9547 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9548 derived assemblers, confusingly reverse the direction of
9549 the operation for fsub{r} and fdiv{r} when the
9550 destination register is not st(0). The Intel assembler
9551 doesn't have this brain damage. Read !SYSV386_COMPAT to
9552 figure out what the hardware really does. */
9553 if (STACK_TOP_P (operands[0]))
9554 p = "{p\t%0, %2|rp\t%2, %0}";
9555 else
9556 p = "{rp\t%2, %0|p\t%0, %2}";
9557 #else
9558 if (STACK_TOP_P (operands[0]))
9559 /* As above for fmul/fadd, we can't store to st(0). */
9560 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9561 else
9562 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9563 #endif
9564 break;
9565 }
9566
9567 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9568 {
9569 #if SYSV386_COMPAT
9570 if (STACK_TOP_P (operands[0]))
9571 p = "{rp\t%0, %1|p\t%1, %0}";
9572 else
9573 p = "{p\t%1, %0|rp\t%0, %1}";
9574 #else
9575 if (STACK_TOP_P (operands[0]))
9576 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9577 else
9578 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9579 #endif
9580 break;
9581 }
9582
9583 if (STACK_TOP_P (operands[0]))
9584 {
9585 if (STACK_TOP_P (operands[1]))
9586 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9587 else
9588 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9589 break;
9590 }
9591 else if (STACK_TOP_P (operands[1]))
9592 {
9593 #if SYSV386_COMPAT
9594 p = "{\t%1, %0|r\t%0, %1}";
9595 #else
9596 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9597 #endif
9598 }
9599 else
9600 {
9601 #if SYSV386_COMPAT
9602 p = "{r\t%2, %0|\t%0, %2}";
9603 #else
9604 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9605 #endif
9606 }
9607 break;
9608
9609 default:
9610 gcc_unreachable ();
9611 }
9612
9613 strcat (buf, p);
9614 return buf;
9615 }
9616
9617 /* Return needed mode for entity in optimize_mode_switching pass. */
9618
9619 int
9620 ix86_mode_needed (int entity, rtx insn)
9621 {
9622 enum attr_i387_cw mode;
9623
9624 /* The mode UNINITIALIZED is used to store control word after a
9625 function call or ASM pattern. The mode ANY specify that function
9626 has no requirements on the control word and make no changes in the
9627 bits we are interested in. */
9628
9629 if (CALL_P (insn)
9630 || (NONJUMP_INSN_P (insn)
9631 && (asm_noperands (PATTERN (insn)) >= 0
9632 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9633 return I387_CW_UNINITIALIZED;
9634
9635 if (recog_memoized (insn) < 0)
9636 return I387_CW_ANY;
9637
9638 mode = get_attr_i387_cw (insn);
9639
9640 switch (entity)
9641 {
9642 case I387_TRUNC:
9643 if (mode == I387_CW_TRUNC)
9644 return mode;
9645 break;
9646
9647 case I387_FLOOR:
9648 if (mode == I387_CW_FLOOR)
9649 return mode;
9650 break;
9651
9652 case I387_CEIL:
9653 if (mode == I387_CW_CEIL)
9654 return mode;
9655 break;
9656
9657 case I387_MASK_PM:
9658 if (mode == I387_CW_MASK_PM)
9659 return mode;
9660 break;
9661
9662 default:
9663 gcc_unreachable ();
9664 }
9665
9666 return I387_CW_ANY;
9667 }
9668
9669 /* Output code to initialize control word copies used by trunc?f?i and
9670 rounding patterns. CURRENT_MODE is set to current control word,
9671 while NEW_MODE is set to new control word. */
9672
9673 void
9674 emit_i387_cw_initialization (int mode)
9675 {
9676 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9677 rtx new_mode;
9678
9679 enum ix86_stack_slot slot;
9680
9681 rtx reg = gen_reg_rtx (HImode);
9682
9683 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9684 emit_move_insn (reg, copy_rtx (stored_mode));
9685
9686 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9687 {
9688 switch (mode)
9689 {
9690 case I387_CW_TRUNC:
9691 /* round toward zero (truncate) */
9692 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9693 slot = SLOT_CW_TRUNC;
9694 break;
9695
9696 case I387_CW_FLOOR:
9697 /* round down toward -oo */
9698 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9699 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9700 slot = SLOT_CW_FLOOR;
9701 break;
9702
9703 case I387_CW_CEIL:
9704 /* round up toward +oo */
9705 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9706 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9707 slot = SLOT_CW_CEIL;
9708 break;
9709
9710 case I387_CW_MASK_PM:
9711 /* mask precision exception for nearbyint() */
9712 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9713 slot = SLOT_CW_MASK_PM;
9714 break;
9715
9716 default:
9717 gcc_unreachable ();
9718 }
9719 }
9720 else
9721 {
9722 switch (mode)
9723 {
9724 case I387_CW_TRUNC:
9725 /* round toward zero (truncate) */
9726 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9727 slot = SLOT_CW_TRUNC;
9728 break;
9729
9730 case I387_CW_FLOOR:
9731 /* round down toward -oo */
9732 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9733 slot = SLOT_CW_FLOOR;
9734 break;
9735
9736 case I387_CW_CEIL:
9737 /* round up toward +oo */
9738 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9739 slot = SLOT_CW_CEIL;
9740 break;
9741
9742 case I387_CW_MASK_PM:
9743 /* mask precision exception for nearbyint() */
9744 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9745 slot = SLOT_CW_MASK_PM;
9746 break;
9747
9748 default:
9749 gcc_unreachable ();
9750 }
9751 }
9752
9753 gcc_assert (slot < MAX_386_STACK_LOCALS);
9754
9755 new_mode = assign_386_stack_local (HImode, slot);
9756 emit_move_insn (new_mode, reg);
9757 }
9758
9759 /* Output code for INSN to convert a float to a signed int. OPERANDS
9760 are the insn operands. The output may be [HSD]Imode and the input
9761 operand may be [SDX]Fmode. */
9762
9763 const char *
9764 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9765 {
9766 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9767 int dimode_p = GET_MODE (operands[0]) == DImode;
9768 int round_mode = get_attr_i387_cw (insn);
9769
9770 /* Jump through a hoop or two for DImode, since the hardware has no
9771 non-popping instruction. We used to do this a different way, but
9772 that was somewhat fragile and broke with post-reload splitters. */
9773 if ((dimode_p || fisttp) && !stack_top_dies)
9774 output_asm_insn ("fld\t%y1", operands);
9775
9776 gcc_assert (STACK_TOP_P (operands[1]));
9777 gcc_assert (MEM_P (operands[0]));
9778 gcc_assert (GET_MODE (operands[1]) != TFmode);
9779
9780 if (fisttp)
9781 output_asm_insn ("fisttp%z0\t%0", operands);
9782 else
9783 {
9784 if (round_mode != I387_CW_ANY)
9785 output_asm_insn ("fldcw\t%3", operands);
9786 if (stack_top_dies || dimode_p)
9787 output_asm_insn ("fistp%z0\t%0", operands);
9788 else
9789 output_asm_insn ("fist%z0\t%0", operands);
9790 if (round_mode != I387_CW_ANY)
9791 output_asm_insn ("fldcw\t%2", operands);
9792 }
9793
9794 return "";
9795 }
9796
9797 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9798 have the values zero or one, indicates the ffreep insn's operand
9799 from the OPERANDS array. */
9800
9801 static const char *
9802 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9803 {
9804 if (TARGET_USE_FFREEP)
9805 #if HAVE_AS_IX86_FFREEP
9806 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9807 #else
9808 {
9809 static char retval[] = ".word\t0xc_df";
9810 int regno = REGNO (operands[opno]);
9811
9812 gcc_assert (FP_REGNO_P (regno));
9813
9814 retval[9] = '0' + (regno - FIRST_STACK_REG);
9815 return retval;
9816 }
9817 #endif
9818
9819 return opno ? "fstp\t%y1" : "fstp\t%y0";
9820 }
9821
9822
9823 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9824 should be used. UNORDERED_P is true when fucom should be used. */
9825
9826 const char *
9827 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9828 {
9829 int stack_top_dies;
9830 rtx cmp_op0, cmp_op1;
9831 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9832
9833 if (eflags_p)
9834 {
9835 cmp_op0 = operands[0];
9836 cmp_op1 = operands[1];
9837 }
9838 else
9839 {
9840 cmp_op0 = operands[1];
9841 cmp_op1 = operands[2];
9842 }
9843
9844 if (is_sse)
9845 {
9846 if (GET_MODE (operands[0]) == SFmode)
9847 if (unordered_p)
9848 return "ucomiss\t{%1, %0|%0, %1}";
9849 else
9850 return "comiss\t{%1, %0|%0, %1}";
9851 else
9852 if (unordered_p)
9853 return "ucomisd\t{%1, %0|%0, %1}";
9854 else
9855 return "comisd\t{%1, %0|%0, %1}";
9856 }
9857
9858 gcc_assert (STACK_TOP_P (cmp_op0));
9859
9860 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9861
9862 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9863 {
9864 if (stack_top_dies)
9865 {
9866 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9867 return output_387_ffreep (operands, 1);
9868 }
9869 else
9870 return "ftst\n\tfnstsw\t%0";
9871 }
9872
9873 if (STACK_REG_P (cmp_op1)
9874 && stack_top_dies
9875 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9876 && REGNO (cmp_op1) != FIRST_STACK_REG)
9877 {
9878 /* If both the top of the 387 stack dies, and the other operand
9879 is also a stack register that dies, then this must be a
9880 `fcompp' float compare */
9881
9882 if (eflags_p)
9883 {
9884 /* There is no double popping fcomi variant. Fortunately,
9885 eflags is immune from the fstp's cc clobbering. */
9886 if (unordered_p)
9887 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9888 else
9889 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9890 return output_387_ffreep (operands, 0);
9891 }
9892 else
9893 {
9894 if (unordered_p)
9895 return "fucompp\n\tfnstsw\t%0";
9896 else
9897 return "fcompp\n\tfnstsw\t%0";
9898 }
9899 }
9900 else
9901 {
9902 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9903
9904 static const char * const alt[16] =
9905 {
9906 "fcom%z2\t%y2\n\tfnstsw\t%0",
9907 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9908 "fucom%z2\t%y2\n\tfnstsw\t%0",
9909 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9910
9911 "ficom%z2\t%y2\n\tfnstsw\t%0",
9912 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9913 NULL,
9914 NULL,
9915
9916 "fcomi\t{%y1, %0|%0, %y1}",
9917 "fcomip\t{%y1, %0|%0, %y1}",
9918 "fucomi\t{%y1, %0|%0, %y1}",
9919 "fucomip\t{%y1, %0|%0, %y1}",
9920
9921 NULL,
9922 NULL,
9923 NULL,
9924 NULL
9925 };
9926
9927 int mask;
9928 const char *ret;
9929
9930 mask = eflags_p << 3;
9931 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9932 mask |= unordered_p << 1;
9933 mask |= stack_top_dies;
9934
9935 gcc_assert (mask < 16);
9936 ret = alt[mask];
9937 gcc_assert (ret);
9938
9939 return ret;
9940 }
9941 }
9942
9943 void
9944 ix86_output_addr_vec_elt (FILE *file, int value)
9945 {
9946 const char *directive = ASM_LONG;
9947
9948 #ifdef ASM_QUAD
9949 if (TARGET_64BIT)
9950 directive = ASM_QUAD;
9951 #else
9952 gcc_assert (!TARGET_64BIT);
9953 #endif
9954
9955 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9956 }
9957
9958 void
9959 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9960 {
9961 const char *directive = ASM_LONG;
9962
9963 #ifdef ASM_QUAD
9964 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9965 directive = ASM_QUAD;
9966 #else
9967 gcc_assert (!TARGET_64BIT);
9968 #endif
9969 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9970 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9971 fprintf (file, "%s%s%d-%s%d\n",
9972 directive, LPREFIX, value, LPREFIX, rel);
9973 else if (HAVE_AS_GOTOFF_IN_DATA)
9974 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9975 #if TARGET_MACHO
9976 else if (TARGET_MACHO)
9977 {
9978 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9979 machopic_output_function_base_name (file);
9980 fprintf(file, "\n");
9981 }
9982 #endif
9983 else
9984 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9985 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9986 }
9987 \f
9988 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9989 for the target. */
9990
9991 void
9992 ix86_expand_clear (rtx dest)
9993 {
9994 rtx tmp;
9995
9996 /* We play register width games, which are only valid after reload. */
9997 gcc_assert (reload_completed);
9998
9999 /* Avoid HImode and its attendant prefix byte. */
10000 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
10001 dest = gen_rtx_REG (SImode, REGNO (dest));
10002 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
10003
10004 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
10005 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
10006 {
10007 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10008 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10009 }
10010
10011 emit_insn (tmp);
10012 }
10013
10014 /* X is an unchanging MEM. If it is a constant pool reference, return
10015 the constant pool rtx, else NULL. */
10016
10017 rtx
10018 maybe_get_pool_constant (rtx x)
10019 {
10020 x = ix86_delegitimize_address (XEXP (x, 0));
10021
10022 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
10023 return get_pool_constant (x);
10024
10025 return NULL_RTX;
10026 }
10027
10028 void
10029 ix86_expand_move (enum machine_mode mode, rtx operands[])
10030 {
10031 int strict = (reload_in_progress || reload_completed);
10032 rtx op0, op1;
10033 enum tls_model model;
10034
10035 op0 = operands[0];
10036 op1 = operands[1];
10037
10038 if (GET_CODE (op1) == SYMBOL_REF)
10039 {
10040 model = SYMBOL_REF_TLS_MODEL (op1);
10041 if (model)
10042 {
10043 op1 = legitimize_tls_address (op1, model, true);
10044 op1 = force_operand (op1, op0);
10045 if (op1 == op0)
10046 return;
10047 }
10048 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10049 && SYMBOL_REF_DLLIMPORT_P (op1))
10050 op1 = legitimize_dllimport_symbol (op1, false);
10051 }
10052 else if (GET_CODE (op1) == CONST
10053 && GET_CODE (XEXP (op1, 0)) == PLUS
10054 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
10055 {
10056 rtx addend = XEXP (XEXP (op1, 0), 1);
10057 rtx symbol = XEXP (XEXP (op1, 0), 0);
10058 rtx tmp = NULL;
10059
10060 model = SYMBOL_REF_TLS_MODEL (symbol);
10061 if (model)
10062 tmp = legitimize_tls_address (symbol, model, true);
10063 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10064 && SYMBOL_REF_DLLIMPORT_P (symbol))
10065 tmp = legitimize_dllimport_symbol (symbol, true);
10066
10067 if (tmp)
10068 {
10069 tmp = force_operand (tmp, NULL);
10070 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
10071 op0, 1, OPTAB_DIRECT);
10072 if (tmp == op0)
10073 return;
10074 }
10075 }
10076
10077 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
10078 {
10079 if (TARGET_MACHO && !TARGET_64BIT)
10080 {
10081 #if TARGET_MACHO
10082 if (MACHOPIC_PURE)
10083 {
10084 rtx temp = ((reload_in_progress
10085 || ((op0 && REG_P (op0))
10086 && mode == Pmode))
10087 ? op0 : gen_reg_rtx (Pmode));
10088 op1 = machopic_indirect_data_reference (op1, temp);
10089 op1 = machopic_legitimize_pic_address (op1, mode,
10090 temp == op1 ? 0 : temp);
10091 }
10092 else if (MACHOPIC_INDIRECT)
10093 op1 = machopic_indirect_data_reference (op1, 0);
10094 if (op0 == op1)
10095 return;
10096 #endif
10097 }
10098 else
10099 {
10100 if (MEM_P (op0))
10101 op1 = force_reg (Pmode, op1);
10102 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
10103 {
10104 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
10105 op1 = legitimize_pic_address (op1, reg);
10106 if (op0 == op1)
10107 return;
10108 }
10109 }
10110 }
10111 else
10112 {
10113 if (MEM_P (op0)
10114 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
10115 || !push_operand (op0, mode))
10116 && MEM_P (op1))
10117 op1 = force_reg (mode, op1);
10118
10119 if (push_operand (op0, mode)
10120 && ! general_no_elim_operand (op1, mode))
10121 op1 = copy_to_mode_reg (mode, op1);
10122
10123 /* Force large constants in 64bit compilation into register
10124 to get them CSEed. */
10125 if (TARGET_64BIT && mode == DImode
10126 && immediate_operand (op1, mode)
10127 && !x86_64_zext_immediate_operand (op1, VOIDmode)
10128 && !register_operand (op0, mode)
10129 && optimize && !reload_completed && !reload_in_progress)
10130 op1 = copy_to_mode_reg (mode, op1);
10131
10132 if (FLOAT_MODE_P (mode))
10133 {
10134 /* If we are loading a floating point constant to a register,
10135 force the value to memory now, since we'll get better code
10136 out the back end. */
10137
10138 if (strict)
10139 ;
10140 else if (GET_CODE (op1) == CONST_DOUBLE)
10141 {
10142 op1 = validize_mem (force_const_mem (mode, op1));
10143 if (!register_operand (op0, mode))
10144 {
10145 rtx temp = gen_reg_rtx (mode);
10146 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
10147 emit_move_insn (op0, temp);
10148 return;
10149 }
10150 }
10151 }
10152 }
10153
10154 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10155 }
10156
10157 void
10158 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
10159 {
10160 rtx op0 = operands[0], op1 = operands[1];
10161 unsigned int align = GET_MODE_ALIGNMENT (mode);
10162
10163 /* Force constants other than zero into memory. We do not know how
10164 the instructions used to build constants modify the upper 64 bits
10165 of the register, once we have that information we may be able
10166 to handle some of them more efficiently. */
10167 if ((reload_in_progress | reload_completed) == 0
10168 && register_operand (op0, mode)
10169 && (CONSTANT_P (op1)
10170 || (GET_CODE (op1) == SUBREG
10171 && CONSTANT_P (SUBREG_REG (op1))))
10172 && standard_sse_constant_p (op1) <= 0)
10173 op1 = validize_mem (force_const_mem (mode, op1));
10174
10175 /* TDmode values are passed as TImode on the stack. Timode values
10176 are moved via xmm registers, and moving them to stack can result in
10177 unaligned memory access. Use ix86_expand_vector_move_misalign()
10178 if memory operand is not aligned correctly. */
10179 if (can_create_pseudo_p ()
10180 && (mode == TImode) && !TARGET_64BIT
10181 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
10182 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
10183 {
10184 rtx tmp[2];
10185
10186 /* ix86_expand_vector_move_misalign() does not like constants ... */
10187 if (CONSTANT_P (op1)
10188 || (GET_CODE (op1) == SUBREG
10189 && CONSTANT_P (SUBREG_REG (op1))))
10190 op1 = validize_mem (force_const_mem (mode, op1));
10191
10192 /* ... nor both arguments in memory. */
10193 if (!register_operand (op0, mode)
10194 && !register_operand (op1, mode))
10195 op1 = force_reg (mode, op1);
10196
10197 tmp[0] = op0; tmp[1] = op1;
10198 ix86_expand_vector_move_misalign (mode, tmp);
10199 return;
10200 }
10201
10202 /* Make operand1 a register if it isn't already. */
10203 if (can_create_pseudo_p ()
10204 && !register_operand (op0, mode)
10205 && !register_operand (op1, mode))
10206 {
10207 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
10208 return;
10209 }
10210
10211 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10212 }
10213
10214 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
10215 straight to ix86_expand_vector_move. */
10216 /* Code generation for scalar reg-reg moves of single and double precision data:
10217 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10218 movaps reg, reg
10219 else
10220 movss reg, reg
10221 if (x86_sse_partial_reg_dependency == true)
10222 movapd reg, reg
10223 else
10224 movsd reg, reg
10225
10226 Code generation for scalar loads of double precision data:
10227 if (x86_sse_split_regs == true)
10228 movlpd mem, reg (gas syntax)
10229 else
10230 movsd mem, reg
10231
10232 Code generation for unaligned packed loads of single precision data
10233 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10234 if (x86_sse_unaligned_move_optimal)
10235 movups mem, reg
10236
10237 if (x86_sse_partial_reg_dependency == true)
10238 {
10239 xorps reg, reg
10240 movlps mem, reg
10241 movhps mem+8, reg
10242 }
10243 else
10244 {
10245 movlps mem, reg
10246 movhps mem+8, reg
10247 }
10248
10249 Code generation for unaligned packed loads of double precision data
10250 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10251 if (x86_sse_unaligned_move_optimal)
10252 movupd mem, reg
10253
10254 if (x86_sse_split_regs == true)
10255 {
10256 movlpd mem, reg
10257 movhpd mem+8, reg
10258 }
10259 else
10260 {
10261 movsd mem, reg
10262 movhpd mem+8, reg
10263 }
10264 */
10265
10266 void
10267 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10268 {
10269 rtx op0, op1, m;
10270
10271 op0 = operands[0];
10272 op1 = operands[1];
10273
10274 if (MEM_P (op1))
10275 {
10276 /* If we're optimizing for size, movups is the smallest. */
10277 if (optimize_size)
10278 {
10279 op0 = gen_lowpart (V4SFmode, op0);
10280 op1 = gen_lowpart (V4SFmode, op1);
10281 emit_insn (gen_sse_movups (op0, op1));
10282 return;
10283 }
10284
10285 /* ??? If we have typed data, then it would appear that using
10286 movdqu is the only way to get unaligned data loaded with
10287 integer type. */
10288 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10289 {
10290 op0 = gen_lowpart (V16QImode, op0);
10291 op1 = gen_lowpart (V16QImode, op1);
10292 emit_insn (gen_sse2_movdqu (op0, op1));
10293 return;
10294 }
10295
10296 if (TARGET_SSE2 && mode == V2DFmode)
10297 {
10298 rtx zero;
10299
10300 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10301 {
10302 op0 = gen_lowpart (V2DFmode, op0);
10303 op1 = gen_lowpart (V2DFmode, op1);
10304 emit_insn (gen_sse2_movupd (op0, op1));
10305 return;
10306 }
10307
10308 /* When SSE registers are split into halves, we can avoid
10309 writing to the top half twice. */
10310 if (TARGET_SSE_SPLIT_REGS)
10311 {
10312 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10313 zero = op0;
10314 }
10315 else
10316 {
10317 /* ??? Not sure about the best option for the Intel chips.
10318 The following would seem to satisfy; the register is
10319 entirely cleared, breaking the dependency chain. We
10320 then store to the upper half, with a dependency depth
10321 of one. A rumor has it that Intel recommends two movsd
10322 followed by an unpacklpd, but this is unconfirmed. And
10323 given that the dependency depth of the unpacklpd would
10324 still be one, I'm not sure why this would be better. */
10325 zero = CONST0_RTX (V2DFmode);
10326 }
10327
10328 m = adjust_address (op1, DFmode, 0);
10329 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10330 m = adjust_address (op1, DFmode, 8);
10331 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10332 }
10333 else
10334 {
10335 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10336 {
10337 op0 = gen_lowpart (V4SFmode, op0);
10338 op1 = gen_lowpart (V4SFmode, op1);
10339 emit_insn (gen_sse_movups (op0, op1));
10340 return;
10341 }
10342
10343 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10344 emit_move_insn (op0, CONST0_RTX (mode));
10345 else
10346 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10347
10348 if (mode != V4SFmode)
10349 op0 = gen_lowpart (V4SFmode, op0);
10350 m = adjust_address (op1, V2SFmode, 0);
10351 emit_insn (gen_sse_loadlps (op0, op0, m));
10352 m = adjust_address (op1, V2SFmode, 8);
10353 emit_insn (gen_sse_loadhps (op0, op0, m));
10354 }
10355 }
10356 else if (MEM_P (op0))
10357 {
10358 /* If we're optimizing for size, movups is the smallest. */
10359 if (optimize_size)
10360 {
10361 op0 = gen_lowpart (V4SFmode, op0);
10362 op1 = gen_lowpart (V4SFmode, op1);
10363 emit_insn (gen_sse_movups (op0, op1));
10364 return;
10365 }
10366
10367 /* ??? Similar to above, only less clear because of quote
10368 typeless stores unquote. */
10369 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10370 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10371 {
10372 op0 = gen_lowpart (V16QImode, op0);
10373 op1 = gen_lowpart (V16QImode, op1);
10374 emit_insn (gen_sse2_movdqu (op0, op1));
10375 return;
10376 }
10377
10378 if (TARGET_SSE2 && mode == V2DFmode)
10379 {
10380 m = adjust_address (op0, DFmode, 0);
10381 emit_insn (gen_sse2_storelpd (m, op1));
10382 m = adjust_address (op0, DFmode, 8);
10383 emit_insn (gen_sse2_storehpd (m, op1));
10384 }
10385 else
10386 {
10387 if (mode != V4SFmode)
10388 op1 = gen_lowpart (V4SFmode, op1);
10389 m = adjust_address (op0, V2SFmode, 0);
10390 emit_insn (gen_sse_storelps (m, op1));
10391 m = adjust_address (op0, V2SFmode, 8);
10392 emit_insn (gen_sse_storehps (m, op1));
10393 }
10394 }
10395 else
10396 gcc_unreachable ();
10397 }
10398
10399 /* Expand a push in MODE. This is some mode for which we do not support
10400 proper push instructions, at least from the registers that we expect
10401 the value to live in. */
10402
10403 void
10404 ix86_expand_push (enum machine_mode mode, rtx x)
10405 {
10406 rtx tmp;
10407
10408 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10409 GEN_INT (-GET_MODE_SIZE (mode)),
10410 stack_pointer_rtx, 1, OPTAB_DIRECT);
10411 if (tmp != stack_pointer_rtx)
10412 emit_move_insn (stack_pointer_rtx, tmp);
10413
10414 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10415 emit_move_insn (tmp, x);
10416 }
10417
10418 /* Helper function of ix86_fixup_binary_operands to canonicalize
10419 operand order. Returns true if the operands should be swapped. */
10420
10421 static bool
10422 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10423 rtx operands[])
10424 {
10425 rtx dst = operands[0];
10426 rtx src1 = operands[1];
10427 rtx src2 = operands[2];
10428
10429 /* If the operation is not commutative, we can't do anything. */
10430 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10431 return false;
10432
10433 /* Highest priority is that src1 should match dst. */
10434 if (rtx_equal_p (dst, src1))
10435 return false;
10436 if (rtx_equal_p (dst, src2))
10437 return true;
10438
10439 /* Next highest priority is that immediate constants come second. */
10440 if (immediate_operand (src2, mode))
10441 return false;
10442 if (immediate_operand (src1, mode))
10443 return true;
10444
10445 /* Lowest priority is that memory references should come second. */
10446 if (MEM_P (src2))
10447 return false;
10448 if (MEM_P (src1))
10449 return true;
10450
10451 return false;
10452 }
10453
10454
10455 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10456 destination to use for the operation. If different from the true
10457 destination in operands[0], a copy operation will be required. */
10458
10459 rtx
10460 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10461 rtx operands[])
10462 {
10463 rtx dst = operands[0];
10464 rtx src1 = operands[1];
10465 rtx src2 = operands[2];
10466
10467 /* Canonicalize operand order. */
10468 if (ix86_swap_binary_operands_p (code, mode, operands))
10469 {
10470 rtx temp = src1;
10471 src1 = src2;
10472 src2 = temp;
10473 }
10474
10475 /* Both source operands cannot be in memory. */
10476 if (MEM_P (src1) && MEM_P (src2))
10477 {
10478 /* Optimization: Only read from memory once. */
10479 if (rtx_equal_p (src1, src2))
10480 {
10481 src2 = force_reg (mode, src2);
10482 src1 = src2;
10483 }
10484 else
10485 src2 = force_reg (mode, src2);
10486 }
10487
10488 /* If the destination is memory, and we do not have matching source
10489 operands, do things in registers. */
10490 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10491 dst = gen_reg_rtx (mode);
10492
10493 /* Source 1 cannot be a constant. */
10494 if (CONSTANT_P (src1))
10495 src1 = force_reg (mode, src1);
10496
10497 /* Source 1 cannot be a non-matching memory. */
10498 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10499 src1 = force_reg (mode, src1);
10500
10501 operands[1] = src1;
10502 operands[2] = src2;
10503 return dst;
10504 }
10505
10506 /* Similarly, but assume that the destination has already been
10507 set up properly. */
10508
10509 void
10510 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10511 enum machine_mode mode, rtx operands[])
10512 {
10513 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10514 gcc_assert (dst == operands[0]);
10515 }
10516
10517 /* Attempt to expand a binary operator. Make the expansion closer to the
10518 actual machine, then just general_operand, which will allow 3 separate
10519 memory references (one output, two input) in a single insn. */
10520
10521 void
10522 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10523 rtx operands[])
10524 {
10525 rtx src1, src2, dst, op, clob;
10526
10527 dst = ix86_fixup_binary_operands (code, mode, operands);
10528 src1 = operands[1];
10529 src2 = operands[2];
10530
10531 /* Emit the instruction. */
10532
10533 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10534 if (reload_in_progress)
10535 {
10536 /* Reload doesn't know about the flags register, and doesn't know that
10537 it doesn't want to clobber it. We can only do this with PLUS. */
10538 gcc_assert (code == PLUS);
10539 emit_insn (op);
10540 }
10541 else
10542 {
10543 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10544 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10545 }
10546
10547 /* Fix up the destination if needed. */
10548 if (dst != operands[0])
10549 emit_move_insn (operands[0], dst);
10550 }
10551
10552 /* Return TRUE or FALSE depending on whether the binary operator meets the
10553 appropriate constraints. */
10554
10555 int
10556 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10557 rtx operands[3])
10558 {
10559 rtx dst = operands[0];
10560 rtx src1 = operands[1];
10561 rtx src2 = operands[2];
10562
10563 /* Both source operands cannot be in memory. */
10564 if (MEM_P (src1) && MEM_P (src2))
10565 return 0;
10566
10567 /* Canonicalize operand order for commutative operators. */
10568 if (ix86_swap_binary_operands_p (code, mode, operands))
10569 {
10570 rtx temp = src1;
10571 src1 = src2;
10572 src2 = temp;
10573 }
10574
10575 /* If the destination is memory, we must have a matching source operand. */
10576 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10577 return 0;
10578
10579 /* Source 1 cannot be a constant. */
10580 if (CONSTANT_P (src1))
10581 return 0;
10582
10583 /* Source 1 cannot be a non-matching memory. */
10584 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10585 return 0;
10586
10587 return 1;
10588 }
10589
10590 /* Attempt to expand a unary operator. Make the expansion closer to the
10591 actual machine, then just general_operand, which will allow 2 separate
10592 memory references (one output, one input) in a single insn. */
10593
10594 void
10595 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10596 rtx operands[])
10597 {
10598 int matching_memory;
10599 rtx src, dst, op, clob;
10600
10601 dst = operands[0];
10602 src = operands[1];
10603
10604 /* If the destination is memory, and we do not have matching source
10605 operands, do things in registers. */
10606 matching_memory = 0;
10607 if (MEM_P (dst))
10608 {
10609 if (rtx_equal_p (dst, src))
10610 matching_memory = 1;
10611 else
10612 dst = gen_reg_rtx (mode);
10613 }
10614
10615 /* When source operand is memory, destination must match. */
10616 if (MEM_P (src) && !matching_memory)
10617 src = force_reg (mode, src);
10618
10619 /* Emit the instruction. */
10620
10621 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10622 if (reload_in_progress || code == NOT)
10623 {
10624 /* Reload doesn't know about the flags register, and doesn't know that
10625 it doesn't want to clobber it. */
10626 gcc_assert (code == NOT);
10627 emit_insn (op);
10628 }
10629 else
10630 {
10631 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10632 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10633 }
10634
10635 /* Fix up the destination if needed. */
10636 if (dst != operands[0])
10637 emit_move_insn (operands[0], dst);
10638 }
10639
10640 /* Return TRUE or FALSE depending on whether the unary operator meets the
10641 appropriate constraints. */
10642
10643 int
10644 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10645 enum machine_mode mode ATTRIBUTE_UNUSED,
10646 rtx operands[2] ATTRIBUTE_UNUSED)
10647 {
10648 /* If one of operands is memory, source and destination must match. */
10649 if ((MEM_P (operands[0])
10650 || MEM_P (operands[1]))
10651 && ! rtx_equal_p (operands[0], operands[1]))
10652 return FALSE;
10653 return TRUE;
10654 }
10655
10656 /* Post-reload splitter for converting an SF or DFmode value in an
10657 SSE register into an unsigned SImode. */
10658
10659 void
10660 ix86_split_convert_uns_si_sse (rtx operands[])
10661 {
10662 enum machine_mode vecmode;
10663 rtx value, large, zero_or_two31, input, two31, x;
10664
10665 large = operands[1];
10666 zero_or_two31 = operands[2];
10667 input = operands[3];
10668 two31 = operands[4];
10669 vecmode = GET_MODE (large);
10670 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10671
10672 /* Load up the value into the low element. We must ensure that the other
10673 elements are valid floats -- zero is the easiest such value. */
10674 if (MEM_P (input))
10675 {
10676 if (vecmode == V4SFmode)
10677 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10678 else
10679 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10680 }
10681 else
10682 {
10683 input = gen_rtx_REG (vecmode, REGNO (input));
10684 emit_move_insn (value, CONST0_RTX (vecmode));
10685 if (vecmode == V4SFmode)
10686 emit_insn (gen_sse_movss (value, value, input));
10687 else
10688 emit_insn (gen_sse2_movsd (value, value, input));
10689 }
10690
10691 emit_move_insn (large, two31);
10692 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10693
10694 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10695 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10696
10697 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10698 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10699
10700 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10701 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10702
10703 large = gen_rtx_REG (V4SImode, REGNO (large));
10704 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10705
10706 x = gen_rtx_REG (V4SImode, REGNO (value));
10707 if (vecmode == V4SFmode)
10708 emit_insn (gen_sse2_cvttps2dq (x, value));
10709 else
10710 emit_insn (gen_sse2_cvttpd2dq (x, value));
10711 value = x;
10712
10713 emit_insn (gen_xorv4si3 (value, value, large));
10714 }
10715
10716 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10717 Expects the 64-bit DImode to be supplied in a pair of integral
10718 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10719 -mfpmath=sse, !optimize_size only. */
10720
10721 void
10722 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10723 {
10724 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10725 rtx int_xmm, fp_xmm;
10726 rtx biases, exponents;
10727 rtx x;
10728
10729 int_xmm = gen_reg_rtx (V4SImode);
10730 if (TARGET_INTER_UNIT_MOVES)
10731 emit_insn (gen_movdi_to_sse (int_xmm, input));
10732 else if (TARGET_SSE_SPLIT_REGS)
10733 {
10734 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10735 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10736 }
10737 else
10738 {
10739 x = gen_reg_rtx (V2DImode);
10740 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10741 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10742 }
10743
10744 x = gen_rtx_CONST_VECTOR (V4SImode,
10745 gen_rtvec (4, GEN_INT (0x43300000UL),
10746 GEN_INT (0x45300000UL),
10747 const0_rtx, const0_rtx));
10748 exponents = validize_mem (force_const_mem (V4SImode, x));
10749
10750 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10751 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10752
10753 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10754 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10755 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10756 (0x1.0p84 + double(fp_value_hi_xmm)).
10757 Note these exponents differ by 32. */
10758
10759 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10760
10761 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10762 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10763 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10764 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10765 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10766 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10767 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10768 biases = validize_mem (force_const_mem (V2DFmode, biases));
10769 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10770
10771 /* Add the upper and lower DFmode values together. */
10772 if (TARGET_SSE3)
10773 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10774 else
10775 {
10776 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10777 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10778 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10779 }
10780
10781 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10782 }
10783
10784 /* Convert an unsigned SImode value into a DFmode. Only currently used
10785 for SSE, but applicable anywhere. */
10786
10787 void
10788 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10789 {
10790 REAL_VALUE_TYPE TWO31r;
10791 rtx x, fp;
10792
10793 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10794 NULL, 1, OPTAB_DIRECT);
10795
10796 fp = gen_reg_rtx (DFmode);
10797 emit_insn (gen_floatsidf2 (fp, x));
10798
10799 real_ldexp (&TWO31r, &dconst1, 31);
10800 x = const_double_from_real_value (TWO31r, DFmode);
10801
10802 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10803 if (x != target)
10804 emit_move_insn (target, x);
10805 }
10806
10807 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10808 32-bit mode; otherwise we have a direct convert instruction. */
10809
10810 void
10811 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10812 {
10813 REAL_VALUE_TYPE TWO32r;
10814 rtx fp_lo, fp_hi, x;
10815
10816 fp_lo = gen_reg_rtx (DFmode);
10817 fp_hi = gen_reg_rtx (DFmode);
10818
10819 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10820
10821 real_ldexp (&TWO32r, &dconst1, 32);
10822 x = const_double_from_real_value (TWO32r, DFmode);
10823 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10824
10825 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10826
10827 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10828 0, OPTAB_DIRECT);
10829 if (x != target)
10830 emit_move_insn (target, x);
10831 }
10832
10833 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10834 For x86_32, -mfpmath=sse, !optimize_size only. */
10835 void
10836 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10837 {
10838 REAL_VALUE_TYPE ONE16r;
10839 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10840
10841 real_ldexp (&ONE16r, &dconst1, 16);
10842 x = const_double_from_real_value (ONE16r, SFmode);
10843 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10844 NULL, 0, OPTAB_DIRECT);
10845 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10846 NULL, 0, OPTAB_DIRECT);
10847 fp_hi = gen_reg_rtx (SFmode);
10848 fp_lo = gen_reg_rtx (SFmode);
10849 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10850 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10851 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10852 0, OPTAB_DIRECT);
10853 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10854 0, OPTAB_DIRECT);
10855 if (!rtx_equal_p (target, fp_hi))
10856 emit_move_insn (target, fp_hi);
10857 }
10858
10859 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10860 then replicate the value for all elements of the vector
10861 register. */
10862
10863 rtx
10864 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10865 {
10866 rtvec v;
10867 switch (mode)
10868 {
10869 case SImode:
10870 gcc_assert (vect);
10871 v = gen_rtvec (4, value, value, value, value);
10872 return gen_rtx_CONST_VECTOR (V4SImode, v);
10873
10874 case DImode:
10875 gcc_assert (vect);
10876 v = gen_rtvec (2, value, value);
10877 return gen_rtx_CONST_VECTOR (V2DImode, v);
10878
10879 case SFmode:
10880 if (vect)
10881 v = gen_rtvec (4, value, value, value, value);
10882 else
10883 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10884 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10885 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10886
10887 case DFmode:
10888 if (vect)
10889 v = gen_rtvec (2, value, value);
10890 else
10891 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10892 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10893
10894 default:
10895 gcc_unreachable ();
10896 }
10897 }
10898
10899 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10900 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10901 for an SSE register. If VECT is true, then replicate the mask for
10902 all elements of the vector register. If INVERT is true, then create
10903 a mask excluding the sign bit. */
10904
10905 rtx
10906 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10907 {
10908 enum machine_mode vec_mode, imode;
10909 HOST_WIDE_INT hi, lo;
10910 int shift = 63;
10911 rtx v;
10912 rtx mask;
10913
10914 /* Find the sign bit, sign extended to 2*HWI. */
10915 switch (mode)
10916 {
10917 case SImode:
10918 case SFmode:
10919 imode = SImode;
10920 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
10921 lo = 0x80000000, hi = lo < 0;
10922 break;
10923
10924 case DImode:
10925 case DFmode:
10926 imode = DImode;
10927 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
10928 if (HOST_BITS_PER_WIDE_INT >= 64)
10929 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10930 else
10931 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10932 break;
10933
10934 case TImode:
10935 case TFmode:
10936 imode = TImode;
10937 vec_mode = VOIDmode;
10938 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
10939 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
10940 break;
10941
10942 default:
10943 gcc_unreachable ();
10944 }
10945
10946 if (invert)
10947 lo = ~lo, hi = ~hi;
10948
10949 /* Force this value into the low part of a fp vector constant. */
10950 mask = immed_double_const (lo, hi, imode);
10951 mask = gen_lowpart (mode, mask);
10952
10953 if (vec_mode == VOIDmode)
10954 return force_reg (mode, mask);
10955
10956 v = ix86_build_const_vector (mode, vect, mask);
10957 return force_reg (vec_mode, v);
10958 }
10959
10960 /* Generate code for floating point ABS or NEG. */
10961
10962 void
10963 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10964 rtx operands[])
10965 {
10966 rtx mask, set, use, clob, dst, src;
10967 bool matching_memory;
10968 bool use_sse = false;
10969 bool vector_mode = VECTOR_MODE_P (mode);
10970 enum machine_mode elt_mode = mode;
10971
10972 if (vector_mode)
10973 {
10974 elt_mode = GET_MODE_INNER (mode);
10975 use_sse = true;
10976 }
10977 else if (mode == TFmode)
10978 use_sse = true;
10979 else if (TARGET_SSE_MATH)
10980 use_sse = SSE_FLOAT_MODE_P (mode);
10981
10982 /* NEG and ABS performed with SSE use bitwise mask operations.
10983 Create the appropriate mask now. */
10984 if (use_sse)
10985 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10986 else
10987 mask = NULL_RTX;
10988
10989 dst = operands[0];
10990 src = operands[1];
10991
10992 /* If the destination is memory, and we don't have matching source
10993 operands or we're using the x87, do things in registers. */
10994 matching_memory = false;
10995 if (MEM_P (dst))
10996 {
10997 if (use_sse && rtx_equal_p (dst, src))
10998 matching_memory = true;
10999 else
11000 dst = gen_reg_rtx (mode);
11001 }
11002 if (MEM_P (src) && !matching_memory)
11003 src = force_reg (mode, src);
11004
11005 if (vector_mode)
11006 {
11007 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
11008 set = gen_rtx_SET (VOIDmode, dst, set);
11009 emit_insn (set);
11010 }
11011 else
11012 {
11013 set = gen_rtx_fmt_e (code, mode, src);
11014 set = gen_rtx_SET (VOIDmode, dst, set);
11015 if (mask)
11016 {
11017 use = gen_rtx_USE (VOIDmode, mask);
11018 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11019 emit_insn (gen_rtx_PARALLEL (VOIDmode,
11020 gen_rtvec (3, set, use, clob)));
11021 }
11022 else
11023 emit_insn (set);
11024 }
11025
11026 if (dst != operands[0])
11027 emit_move_insn (operands[0], dst);
11028 }
11029
11030 /* Expand a copysign operation. Special case operand 0 being a constant. */
11031
11032 void
11033 ix86_expand_copysign (rtx operands[])
11034 {
11035 enum machine_mode mode, vmode;
11036 rtx dest, op0, op1, mask, nmask;
11037
11038 dest = operands[0];
11039 op0 = operands[1];
11040 op1 = operands[2];
11041
11042 mode = GET_MODE (dest);
11043 vmode = mode == SFmode ? V4SFmode : V2DFmode;
11044
11045 if (GET_CODE (op0) == CONST_DOUBLE)
11046 {
11047 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
11048
11049 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
11050 op0 = simplify_unary_operation (ABS, mode, op0, mode);
11051
11052 if (mode == SFmode || mode == DFmode)
11053 {
11054 if (op0 == CONST0_RTX (mode))
11055 op0 = CONST0_RTX (vmode);
11056 else
11057 {
11058 rtvec v;
11059
11060 if (mode == SFmode)
11061 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
11062 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11063 else
11064 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
11065 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
11066 }
11067 }
11068
11069 mask = ix86_build_signbit_mask (mode, 0, 0);
11070
11071 if (mode == SFmode)
11072 copysign_insn = gen_copysignsf3_const;
11073 else if (mode == DFmode)
11074 copysign_insn = gen_copysigndf3_const;
11075 else
11076 copysign_insn = gen_copysigntf3_const;
11077
11078 emit_insn (copysign_insn (dest, op0, op1, mask));
11079 }
11080 else
11081 {
11082 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
11083
11084 nmask = ix86_build_signbit_mask (mode, 0, 1);
11085 mask = ix86_build_signbit_mask (mode, 0, 0);
11086
11087 if (mode == SFmode)
11088 copysign_insn = gen_copysignsf3_var;
11089 else if (mode == DFmode)
11090 copysign_insn = gen_copysigndf3_var;
11091 else
11092 copysign_insn = gen_copysigntf3_var;
11093
11094 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
11095 }
11096 }
11097
11098 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11099 be a constant, and so has already been expanded into a vector constant. */
11100
11101 void
11102 ix86_split_copysign_const (rtx operands[])
11103 {
11104 enum machine_mode mode, vmode;
11105 rtx dest, op0, op1, mask, x;
11106
11107 dest = operands[0];
11108 op0 = operands[1];
11109 op1 = operands[2];
11110 mask = operands[3];
11111
11112 mode = GET_MODE (dest);
11113 vmode = GET_MODE (mask);
11114
11115 dest = simplify_gen_subreg (vmode, dest, mode, 0);
11116 x = gen_rtx_AND (vmode, dest, mask);
11117 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11118
11119 if (op0 != CONST0_RTX (vmode))
11120 {
11121 x = gen_rtx_IOR (vmode, dest, op0);
11122 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11123 }
11124 }
11125
11126 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11127 so we have to do two masks. */
11128
11129 void
11130 ix86_split_copysign_var (rtx operands[])
11131 {
11132 enum machine_mode mode, vmode;
11133 rtx dest, scratch, op0, op1, mask, nmask, x;
11134
11135 dest = operands[0];
11136 scratch = operands[1];
11137 op0 = operands[2];
11138 op1 = operands[3];
11139 nmask = operands[4];
11140 mask = operands[5];
11141
11142 mode = GET_MODE (dest);
11143 vmode = GET_MODE (mask);
11144
11145 if (rtx_equal_p (op0, op1))
11146 {
11147 /* Shouldn't happen often (it's useless, obviously), but when it does
11148 we'd generate incorrect code if we continue below. */
11149 emit_move_insn (dest, op0);
11150 return;
11151 }
11152
11153 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
11154 {
11155 gcc_assert (REGNO (op1) == REGNO (scratch));
11156
11157 x = gen_rtx_AND (vmode, scratch, mask);
11158 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11159
11160 dest = mask;
11161 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11162 x = gen_rtx_NOT (vmode, dest);
11163 x = gen_rtx_AND (vmode, x, op0);
11164 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11165 }
11166 else
11167 {
11168 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
11169 {
11170 x = gen_rtx_AND (vmode, scratch, mask);
11171 }
11172 else /* alternative 2,4 */
11173 {
11174 gcc_assert (REGNO (mask) == REGNO (scratch));
11175 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
11176 x = gen_rtx_AND (vmode, scratch, op1);
11177 }
11178 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11179
11180 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
11181 {
11182 dest = simplify_gen_subreg (vmode, op0, mode, 0);
11183 x = gen_rtx_AND (vmode, dest, nmask);
11184 }
11185 else /* alternative 3,4 */
11186 {
11187 gcc_assert (REGNO (nmask) == REGNO (dest));
11188 dest = nmask;
11189 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11190 x = gen_rtx_AND (vmode, dest, op0);
11191 }
11192 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11193 }
11194
11195 x = gen_rtx_IOR (vmode, dest, scratch);
11196 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11197 }
11198
11199 /* Return TRUE or FALSE depending on whether the first SET in INSN
11200 has source and destination with matching CC modes, and that the
11201 CC mode is at least as constrained as REQ_MODE. */
11202
11203 int
11204 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
11205 {
11206 rtx set;
11207 enum machine_mode set_mode;
11208
11209 set = PATTERN (insn);
11210 if (GET_CODE (set) == PARALLEL)
11211 set = XVECEXP (set, 0, 0);
11212 gcc_assert (GET_CODE (set) == SET);
11213 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
11214
11215 set_mode = GET_MODE (SET_DEST (set));
11216 switch (set_mode)
11217 {
11218 case CCNOmode:
11219 if (req_mode != CCNOmode
11220 && (req_mode != CCmode
11221 || XEXP (SET_SRC (set), 1) != const0_rtx))
11222 return 0;
11223 break;
11224 case CCmode:
11225 if (req_mode == CCGCmode)
11226 return 0;
11227 /* FALLTHRU */
11228 case CCGCmode:
11229 if (req_mode == CCGOCmode || req_mode == CCNOmode)
11230 return 0;
11231 /* FALLTHRU */
11232 case CCGOCmode:
11233 if (req_mode == CCZmode)
11234 return 0;
11235 /* FALLTHRU */
11236 case CCZmode:
11237 break;
11238
11239 default:
11240 gcc_unreachable ();
11241 }
11242
11243 return (GET_MODE (SET_SRC (set)) == set_mode);
11244 }
11245
11246 /* Generate insn patterns to do an integer compare of OPERANDS. */
11247
11248 static rtx
11249 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11250 {
11251 enum machine_mode cmpmode;
11252 rtx tmp, flags;
11253
11254 cmpmode = SELECT_CC_MODE (code, op0, op1);
11255 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11256
11257 /* This is very simple, but making the interface the same as in the
11258 FP case makes the rest of the code easier. */
11259 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11260 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11261
11262 /* Return the test that should be put into the flags user, i.e.
11263 the bcc, scc, or cmov instruction. */
11264 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11265 }
11266
11267 /* Figure out whether to use ordered or unordered fp comparisons.
11268 Return the appropriate mode to use. */
11269
11270 enum machine_mode
11271 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11272 {
11273 /* ??? In order to make all comparisons reversible, we do all comparisons
11274 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11275 all forms trapping and nontrapping comparisons, we can make inequality
11276 comparisons trapping again, since it results in better code when using
11277 FCOM based compares. */
11278 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11279 }
11280
11281 enum machine_mode
11282 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11283 {
11284 enum machine_mode mode = GET_MODE (op0);
11285
11286 if (SCALAR_FLOAT_MODE_P (mode))
11287 {
11288 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11289 return ix86_fp_compare_mode (code);
11290 }
11291
11292 switch (code)
11293 {
11294 /* Only zero flag is needed. */
11295 case EQ: /* ZF=0 */
11296 case NE: /* ZF!=0 */
11297 return CCZmode;
11298 /* Codes needing carry flag. */
11299 case GEU: /* CF=0 */
11300 case LTU: /* CF=1 */
11301 /* Detect overflow checks. They need just the carry flag. */
11302 if (GET_CODE (op0) == PLUS
11303 && rtx_equal_p (op1, XEXP (op0, 0)))
11304 return CCCmode;
11305 else
11306 return CCmode;
11307 case GTU: /* CF=0 & ZF=0 */
11308 case LEU: /* CF=1 | ZF=1 */
11309 /* Detect overflow checks. They need just the carry flag. */
11310 if (GET_CODE (op0) == MINUS
11311 && rtx_equal_p (op1, XEXP (op0, 0)))
11312 return CCCmode;
11313 else
11314 return CCmode;
11315 /* Codes possibly doable only with sign flag when
11316 comparing against zero. */
11317 case GE: /* SF=OF or SF=0 */
11318 case LT: /* SF<>OF or SF=1 */
11319 if (op1 == const0_rtx)
11320 return CCGOCmode;
11321 else
11322 /* For other cases Carry flag is not required. */
11323 return CCGCmode;
11324 /* Codes doable only with sign flag when comparing
11325 against zero, but we miss jump instruction for it
11326 so we need to use relational tests against overflow
11327 that thus needs to be zero. */
11328 case GT: /* ZF=0 & SF=OF */
11329 case LE: /* ZF=1 | SF<>OF */
11330 if (op1 == const0_rtx)
11331 return CCNOmode;
11332 else
11333 return CCGCmode;
11334 /* strcmp pattern do (use flags) and combine may ask us for proper
11335 mode. */
11336 case USE:
11337 return CCmode;
11338 default:
11339 gcc_unreachable ();
11340 }
11341 }
11342
11343 /* Return the fixed registers used for condition codes. */
11344
11345 static bool
11346 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11347 {
11348 *p1 = FLAGS_REG;
11349 *p2 = FPSR_REG;
11350 return true;
11351 }
11352
11353 /* If two condition code modes are compatible, return a condition code
11354 mode which is compatible with both. Otherwise, return
11355 VOIDmode. */
11356
11357 static enum machine_mode
11358 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11359 {
11360 if (m1 == m2)
11361 return m1;
11362
11363 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11364 return VOIDmode;
11365
11366 if ((m1 == CCGCmode && m2 == CCGOCmode)
11367 || (m1 == CCGOCmode && m2 == CCGCmode))
11368 return CCGCmode;
11369
11370 switch (m1)
11371 {
11372 default:
11373 gcc_unreachable ();
11374
11375 case CCmode:
11376 case CCGCmode:
11377 case CCGOCmode:
11378 case CCNOmode:
11379 case CCAmode:
11380 case CCCmode:
11381 case CCOmode:
11382 case CCSmode:
11383 case CCZmode:
11384 switch (m2)
11385 {
11386 default:
11387 return VOIDmode;
11388
11389 case CCmode:
11390 case CCGCmode:
11391 case CCGOCmode:
11392 case CCNOmode:
11393 case CCAmode:
11394 case CCCmode:
11395 case CCOmode:
11396 case CCSmode:
11397 case CCZmode:
11398 return CCmode;
11399 }
11400
11401 case CCFPmode:
11402 case CCFPUmode:
11403 /* These are only compatible with themselves, which we already
11404 checked above. */
11405 return VOIDmode;
11406 }
11407 }
11408
11409 /* Split comparison code CODE into comparisons we can do using branch
11410 instructions. BYPASS_CODE is comparison code for branch that will
11411 branch around FIRST_CODE and SECOND_CODE. If some of branches
11412 is not required, set value to UNKNOWN.
11413 We never require more than two branches. */
11414
11415 void
11416 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11417 enum rtx_code *first_code,
11418 enum rtx_code *second_code)
11419 {
11420 *first_code = code;
11421 *bypass_code = UNKNOWN;
11422 *second_code = UNKNOWN;
11423
11424 /* The fcomi comparison sets flags as follows:
11425
11426 cmp ZF PF CF
11427 > 0 0 0
11428 < 0 0 1
11429 = 1 0 0
11430 un 1 1 1 */
11431
11432 switch (code)
11433 {
11434 case GT: /* GTU - CF=0 & ZF=0 */
11435 case GE: /* GEU - CF=0 */
11436 case ORDERED: /* PF=0 */
11437 case UNORDERED: /* PF=1 */
11438 case UNEQ: /* EQ - ZF=1 */
11439 case UNLT: /* LTU - CF=1 */
11440 case UNLE: /* LEU - CF=1 | ZF=1 */
11441 case LTGT: /* EQ - ZF=0 */
11442 break;
11443 case LT: /* LTU - CF=1 - fails on unordered */
11444 *first_code = UNLT;
11445 *bypass_code = UNORDERED;
11446 break;
11447 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11448 *first_code = UNLE;
11449 *bypass_code = UNORDERED;
11450 break;
11451 case EQ: /* EQ - ZF=1 - fails on unordered */
11452 *first_code = UNEQ;
11453 *bypass_code = UNORDERED;
11454 break;
11455 case NE: /* NE - ZF=0 - fails on unordered */
11456 *first_code = LTGT;
11457 *second_code = UNORDERED;
11458 break;
11459 case UNGE: /* GEU - CF=0 - fails on unordered */
11460 *first_code = GE;
11461 *second_code = UNORDERED;
11462 break;
11463 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11464 *first_code = GT;
11465 *second_code = UNORDERED;
11466 break;
11467 default:
11468 gcc_unreachable ();
11469 }
11470 if (!TARGET_IEEE_FP)
11471 {
11472 *second_code = UNKNOWN;
11473 *bypass_code = UNKNOWN;
11474 }
11475 }
11476
11477 /* Return cost of comparison done fcom + arithmetics operations on AX.
11478 All following functions do use number of instructions as a cost metrics.
11479 In future this should be tweaked to compute bytes for optimize_size and
11480 take into account performance of various instructions on various CPUs. */
11481 static int
11482 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11483 {
11484 if (!TARGET_IEEE_FP)
11485 return 4;
11486 /* The cost of code output by ix86_expand_fp_compare. */
11487 switch (code)
11488 {
11489 case UNLE:
11490 case UNLT:
11491 case LTGT:
11492 case GT:
11493 case GE:
11494 case UNORDERED:
11495 case ORDERED:
11496 case UNEQ:
11497 return 4;
11498 break;
11499 case LT:
11500 case NE:
11501 case EQ:
11502 case UNGE:
11503 return 5;
11504 break;
11505 case LE:
11506 case UNGT:
11507 return 6;
11508 break;
11509 default:
11510 gcc_unreachable ();
11511 }
11512 }
11513
11514 /* Return cost of comparison done using fcomi operation.
11515 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11516 static int
11517 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11518 {
11519 enum rtx_code bypass_code, first_code, second_code;
11520 /* Return arbitrarily high cost when instruction is not supported - this
11521 prevents gcc from using it. */
11522 if (!TARGET_CMOVE)
11523 return 1024;
11524 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11525 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11526 }
11527
11528 /* Return cost of comparison done using sahf operation.
11529 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11530 static int
11531 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11532 {
11533 enum rtx_code bypass_code, first_code, second_code;
11534 /* Return arbitrarily high cost when instruction is not preferred - this
11535 avoids gcc from using it. */
11536 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11537 return 1024;
11538 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11539 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11540 }
11541
11542 /* Compute cost of the comparison done using any method.
11543 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11544 static int
11545 ix86_fp_comparison_cost (enum rtx_code code)
11546 {
11547 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11548 int min;
11549
11550 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11551 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11552
11553 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11554 if (min > sahf_cost)
11555 min = sahf_cost;
11556 if (min > fcomi_cost)
11557 min = fcomi_cost;
11558 return min;
11559 }
11560
11561 /* Return true if we should use an FCOMI instruction for this
11562 fp comparison. */
11563
11564 int
11565 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11566 {
11567 enum rtx_code swapped_code = swap_condition (code);
11568
11569 return ((ix86_fp_comparison_cost (code)
11570 == ix86_fp_comparison_fcomi_cost (code))
11571 || (ix86_fp_comparison_cost (swapped_code)
11572 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11573 }
11574
11575 /* Swap, force into registers, or otherwise massage the two operands
11576 to a fp comparison. The operands are updated in place; the new
11577 comparison code is returned. */
11578
11579 static enum rtx_code
11580 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11581 {
11582 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11583 rtx op0 = *pop0, op1 = *pop1;
11584 enum machine_mode op_mode = GET_MODE (op0);
11585 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11586
11587 /* All of the unordered compare instructions only work on registers.
11588 The same is true of the fcomi compare instructions. The XFmode
11589 compare instructions require registers except when comparing
11590 against zero or when converting operand 1 from fixed point to
11591 floating point. */
11592
11593 if (!is_sse
11594 && (fpcmp_mode == CCFPUmode
11595 || (op_mode == XFmode
11596 && ! (standard_80387_constant_p (op0) == 1
11597 || standard_80387_constant_p (op1) == 1)
11598 && GET_CODE (op1) != FLOAT)
11599 || ix86_use_fcomi_compare (code)))
11600 {
11601 op0 = force_reg (op_mode, op0);
11602 op1 = force_reg (op_mode, op1);
11603 }
11604 else
11605 {
11606 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11607 things around if they appear profitable, otherwise force op0
11608 into a register. */
11609
11610 if (standard_80387_constant_p (op0) == 0
11611 || (MEM_P (op0)
11612 && ! (standard_80387_constant_p (op1) == 0
11613 || MEM_P (op1))))
11614 {
11615 rtx tmp;
11616 tmp = op0, op0 = op1, op1 = tmp;
11617 code = swap_condition (code);
11618 }
11619
11620 if (!REG_P (op0))
11621 op0 = force_reg (op_mode, op0);
11622
11623 if (CONSTANT_P (op1))
11624 {
11625 int tmp = standard_80387_constant_p (op1);
11626 if (tmp == 0)
11627 op1 = validize_mem (force_const_mem (op_mode, op1));
11628 else if (tmp == 1)
11629 {
11630 if (TARGET_CMOVE)
11631 op1 = force_reg (op_mode, op1);
11632 }
11633 else
11634 op1 = force_reg (op_mode, op1);
11635 }
11636 }
11637
11638 /* Try to rearrange the comparison to make it cheaper. */
11639 if (ix86_fp_comparison_cost (code)
11640 > ix86_fp_comparison_cost (swap_condition (code))
11641 && (REG_P (op1) || can_create_pseudo_p ()))
11642 {
11643 rtx tmp;
11644 tmp = op0, op0 = op1, op1 = tmp;
11645 code = swap_condition (code);
11646 if (!REG_P (op0))
11647 op0 = force_reg (op_mode, op0);
11648 }
11649
11650 *pop0 = op0;
11651 *pop1 = op1;
11652 return code;
11653 }
11654
11655 /* Convert comparison codes we use to represent FP comparison to integer
11656 code that will result in proper branch. Return UNKNOWN if no such code
11657 is available. */
11658
11659 enum rtx_code
11660 ix86_fp_compare_code_to_integer (enum rtx_code code)
11661 {
11662 switch (code)
11663 {
11664 case GT:
11665 return GTU;
11666 case GE:
11667 return GEU;
11668 case ORDERED:
11669 case UNORDERED:
11670 return code;
11671 break;
11672 case UNEQ:
11673 return EQ;
11674 break;
11675 case UNLT:
11676 return LTU;
11677 break;
11678 case UNLE:
11679 return LEU;
11680 break;
11681 case LTGT:
11682 return NE;
11683 break;
11684 default:
11685 return UNKNOWN;
11686 }
11687 }
11688
11689 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11690
11691 static rtx
11692 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11693 rtx *second_test, rtx *bypass_test)
11694 {
11695 enum machine_mode fpcmp_mode, intcmp_mode;
11696 rtx tmp, tmp2;
11697 int cost = ix86_fp_comparison_cost (code);
11698 enum rtx_code bypass_code, first_code, second_code;
11699
11700 fpcmp_mode = ix86_fp_compare_mode (code);
11701 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11702
11703 if (second_test)
11704 *second_test = NULL_RTX;
11705 if (bypass_test)
11706 *bypass_test = NULL_RTX;
11707
11708 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11709
11710 /* Do fcomi/sahf based test when profitable. */
11711 if (ix86_fp_comparison_arithmetics_cost (code) > cost
11712 && (bypass_code == UNKNOWN || bypass_test)
11713 && (second_code == UNKNOWN || second_test))
11714 {
11715 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11716 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11717 tmp);
11718 if (TARGET_CMOVE)
11719 emit_insn (tmp);
11720 else
11721 {
11722 gcc_assert (TARGET_SAHF);
11723
11724 if (!scratch)
11725 scratch = gen_reg_rtx (HImode);
11726 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
11727
11728 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
11729 }
11730
11731 /* The FP codes work out to act like unsigned. */
11732 intcmp_mode = fpcmp_mode;
11733 code = first_code;
11734 if (bypass_code != UNKNOWN)
11735 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11736 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11737 const0_rtx);
11738 if (second_code != UNKNOWN)
11739 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11740 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11741 const0_rtx);
11742 }
11743 else
11744 {
11745 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11746 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11747 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11748 if (!scratch)
11749 scratch = gen_reg_rtx (HImode);
11750 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11751
11752 /* In the unordered case, we have to check C2 for NaN's, which
11753 doesn't happen to work out to anything nice combination-wise.
11754 So do some bit twiddling on the value we've got in AH to come
11755 up with an appropriate set of condition codes. */
11756
11757 intcmp_mode = CCNOmode;
11758 switch (code)
11759 {
11760 case GT:
11761 case UNGT:
11762 if (code == GT || !TARGET_IEEE_FP)
11763 {
11764 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11765 code = EQ;
11766 }
11767 else
11768 {
11769 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11770 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11771 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11772 intcmp_mode = CCmode;
11773 code = GEU;
11774 }
11775 break;
11776 case LT:
11777 case UNLT:
11778 if (code == LT && TARGET_IEEE_FP)
11779 {
11780 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11781 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11782 intcmp_mode = CCmode;
11783 code = EQ;
11784 }
11785 else
11786 {
11787 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11788 code = NE;
11789 }
11790 break;
11791 case GE:
11792 case UNGE:
11793 if (code == GE || !TARGET_IEEE_FP)
11794 {
11795 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11796 code = EQ;
11797 }
11798 else
11799 {
11800 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11801 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11802 GEN_INT (0x01)));
11803 code = NE;
11804 }
11805 break;
11806 case LE:
11807 case UNLE:
11808 if (code == LE && TARGET_IEEE_FP)
11809 {
11810 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11811 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11812 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11813 intcmp_mode = CCmode;
11814 code = LTU;
11815 }
11816 else
11817 {
11818 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11819 code = NE;
11820 }
11821 break;
11822 case EQ:
11823 case UNEQ:
11824 if (code == EQ && TARGET_IEEE_FP)
11825 {
11826 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11827 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11828 intcmp_mode = CCmode;
11829 code = EQ;
11830 }
11831 else
11832 {
11833 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11834 code = NE;
11835 break;
11836 }
11837 break;
11838 case NE:
11839 case LTGT:
11840 if (code == NE && TARGET_IEEE_FP)
11841 {
11842 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11843 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11844 GEN_INT (0x40)));
11845 code = NE;
11846 }
11847 else
11848 {
11849 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11850 code = EQ;
11851 }
11852 break;
11853
11854 case UNORDERED:
11855 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11856 code = NE;
11857 break;
11858 case ORDERED:
11859 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11860 code = EQ;
11861 break;
11862
11863 default:
11864 gcc_unreachable ();
11865 }
11866 }
11867
11868 /* Return the test that should be put into the flags user, i.e.
11869 the bcc, scc, or cmov instruction. */
11870 return gen_rtx_fmt_ee (code, VOIDmode,
11871 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11872 const0_rtx);
11873 }
11874
11875 rtx
11876 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11877 {
11878 rtx op0, op1, ret;
11879 op0 = ix86_compare_op0;
11880 op1 = ix86_compare_op1;
11881
11882 if (second_test)
11883 *second_test = NULL_RTX;
11884 if (bypass_test)
11885 *bypass_test = NULL_RTX;
11886
11887 if (ix86_compare_emitted)
11888 {
11889 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11890 ix86_compare_emitted = NULL_RTX;
11891 }
11892 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11893 {
11894 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11895 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11896 second_test, bypass_test);
11897 }
11898 else
11899 ret = ix86_expand_int_compare (code, op0, op1);
11900
11901 return ret;
11902 }
11903
11904 /* Return true if the CODE will result in nontrivial jump sequence. */
11905 bool
11906 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11907 {
11908 enum rtx_code bypass_code, first_code, second_code;
11909 if (!TARGET_CMOVE)
11910 return true;
11911 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11912 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11913 }
11914
11915 void
11916 ix86_expand_branch (enum rtx_code code, rtx label)
11917 {
11918 rtx tmp;
11919
11920 /* If we have emitted a compare insn, go straight to simple.
11921 ix86_expand_compare won't emit anything if ix86_compare_emitted
11922 is non NULL. */
11923 if (ix86_compare_emitted)
11924 goto simple;
11925
11926 switch (GET_MODE (ix86_compare_op0))
11927 {
11928 case QImode:
11929 case HImode:
11930 case SImode:
11931 simple:
11932 tmp = ix86_expand_compare (code, NULL, NULL);
11933 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11934 gen_rtx_LABEL_REF (VOIDmode, label),
11935 pc_rtx);
11936 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11937 return;
11938
11939 case SFmode:
11940 case DFmode:
11941 case XFmode:
11942 {
11943 rtvec vec;
11944 int use_fcomi;
11945 enum rtx_code bypass_code, first_code, second_code;
11946
11947 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11948 &ix86_compare_op1);
11949
11950 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11951
11952 /* Check whether we will use the natural sequence with one jump. If
11953 so, we can expand jump early. Otherwise delay expansion by
11954 creating compound insn to not confuse optimizers. */
11955 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
11956 {
11957 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11958 gen_rtx_LABEL_REF (VOIDmode, label),
11959 pc_rtx, NULL_RTX, NULL_RTX);
11960 }
11961 else
11962 {
11963 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11964 ix86_compare_op0, ix86_compare_op1);
11965 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11966 gen_rtx_LABEL_REF (VOIDmode, label),
11967 pc_rtx);
11968 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11969
11970 use_fcomi = ix86_use_fcomi_compare (code);
11971 vec = rtvec_alloc (3 + !use_fcomi);
11972 RTVEC_ELT (vec, 0) = tmp;
11973 RTVEC_ELT (vec, 1)
11974 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
11975 RTVEC_ELT (vec, 2)
11976 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
11977 if (! use_fcomi)
11978 RTVEC_ELT (vec, 3)
11979 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11980
11981 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11982 }
11983 return;
11984 }
11985
11986 case DImode:
11987 if (TARGET_64BIT)
11988 goto simple;
11989 case TImode:
11990 /* Expand DImode branch into multiple compare+branch. */
11991 {
11992 rtx lo[2], hi[2], label2;
11993 enum rtx_code code1, code2, code3;
11994 enum machine_mode submode;
11995
11996 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11997 {
11998 tmp = ix86_compare_op0;
11999 ix86_compare_op0 = ix86_compare_op1;
12000 ix86_compare_op1 = tmp;
12001 code = swap_condition (code);
12002 }
12003 if (GET_MODE (ix86_compare_op0) == DImode)
12004 {
12005 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
12006 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
12007 submode = SImode;
12008 }
12009 else
12010 {
12011 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
12012 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
12013 submode = DImode;
12014 }
12015
12016 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12017 avoid two branches. This costs one extra insn, so disable when
12018 optimizing for size. */
12019
12020 if ((code == EQ || code == NE)
12021 && (!optimize_size
12022 || hi[1] == const0_rtx || lo[1] == const0_rtx))
12023 {
12024 rtx xor0, xor1;
12025
12026 xor1 = hi[0];
12027 if (hi[1] != const0_rtx)
12028 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
12029 NULL_RTX, 0, OPTAB_WIDEN);
12030
12031 xor0 = lo[0];
12032 if (lo[1] != const0_rtx)
12033 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
12034 NULL_RTX, 0, OPTAB_WIDEN);
12035
12036 tmp = expand_binop (submode, ior_optab, xor1, xor0,
12037 NULL_RTX, 0, OPTAB_WIDEN);
12038
12039 ix86_compare_op0 = tmp;
12040 ix86_compare_op1 = const0_rtx;
12041 ix86_expand_branch (code, label);
12042 return;
12043 }
12044
12045 /* Otherwise, if we are doing less-than or greater-or-equal-than,
12046 op1 is a constant and the low word is zero, then we can just
12047 examine the high word. */
12048
12049 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
12050 switch (code)
12051 {
12052 case LT: case LTU: case GE: case GEU:
12053 ix86_compare_op0 = hi[0];
12054 ix86_compare_op1 = hi[1];
12055 ix86_expand_branch (code, label);
12056 return;
12057 default:
12058 break;
12059 }
12060
12061 /* Otherwise, we need two or three jumps. */
12062
12063 label2 = gen_label_rtx ();
12064
12065 code1 = code;
12066 code2 = swap_condition (code);
12067 code3 = unsigned_condition (code);
12068
12069 switch (code)
12070 {
12071 case LT: case GT: case LTU: case GTU:
12072 break;
12073
12074 case LE: code1 = LT; code2 = GT; break;
12075 case GE: code1 = GT; code2 = LT; break;
12076 case LEU: code1 = LTU; code2 = GTU; break;
12077 case GEU: code1 = GTU; code2 = LTU; break;
12078
12079 case EQ: code1 = UNKNOWN; code2 = NE; break;
12080 case NE: code2 = UNKNOWN; break;
12081
12082 default:
12083 gcc_unreachable ();
12084 }
12085
12086 /*
12087 * a < b =>
12088 * if (hi(a) < hi(b)) goto true;
12089 * if (hi(a) > hi(b)) goto false;
12090 * if (lo(a) < lo(b)) goto true;
12091 * false:
12092 */
12093
12094 ix86_compare_op0 = hi[0];
12095 ix86_compare_op1 = hi[1];
12096
12097 if (code1 != UNKNOWN)
12098 ix86_expand_branch (code1, label);
12099 if (code2 != UNKNOWN)
12100 ix86_expand_branch (code2, label2);
12101
12102 ix86_compare_op0 = lo[0];
12103 ix86_compare_op1 = lo[1];
12104 ix86_expand_branch (code3, label);
12105
12106 if (code2 != UNKNOWN)
12107 emit_label (label2);
12108 return;
12109 }
12110
12111 default:
12112 gcc_unreachable ();
12113 }
12114 }
12115
12116 /* Split branch based on floating point condition. */
12117 void
12118 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
12119 rtx target1, rtx target2, rtx tmp, rtx pushed)
12120 {
12121 rtx second, bypass;
12122 rtx label = NULL_RTX;
12123 rtx condition;
12124 int bypass_probability = -1, second_probability = -1, probability = -1;
12125 rtx i;
12126
12127 if (target2 != pc_rtx)
12128 {
12129 rtx tmp = target2;
12130 code = reverse_condition_maybe_unordered (code);
12131 target2 = target1;
12132 target1 = tmp;
12133 }
12134
12135 condition = ix86_expand_fp_compare (code, op1, op2,
12136 tmp, &second, &bypass);
12137
12138 /* Remove pushed operand from stack. */
12139 if (pushed)
12140 ix86_free_from_memory (GET_MODE (pushed));
12141
12142 if (split_branch_probability >= 0)
12143 {
12144 /* Distribute the probabilities across the jumps.
12145 Assume the BYPASS and SECOND to be always test
12146 for UNORDERED. */
12147 probability = split_branch_probability;
12148
12149 /* Value of 1 is low enough to make no need for probability
12150 to be updated. Later we may run some experiments and see
12151 if unordered values are more frequent in practice. */
12152 if (bypass)
12153 bypass_probability = 1;
12154 if (second)
12155 second_probability = 1;
12156 }
12157 if (bypass != NULL_RTX)
12158 {
12159 label = gen_label_rtx ();
12160 i = emit_jump_insn (gen_rtx_SET
12161 (VOIDmode, pc_rtx,
12162 gen_rtx_IF_THEN_ELSE (VOIDmode,
12163 bypass,
12164 gen_rtx_LABEL_REF (VOIDmode,
12165 label),
12166 pc_rtx)));
12167 if (bypass_probability >= 0)
12168 REG_NOTES (i)
12169 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12170 GEN_INT (bypass_probability),
12171 REG_NOTES (i));
12172 }
12173 i = emit_jump_insn (gen_rtx_SET
12174 (VOIDmode, pc_rtx,
12175 gen_rtx_IF_THEN_ELSE (VOIDmode,
12176 condition, target1, target2)));
12177 if (probability >= 0)
12178 REG_NOTES (i)
12179 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12180 GEN_INT (probability),
12181 REG_NOTES (i));
12182 if (second != NULL_RTX)
12183 {
12184 i = emit_jump_insn (gen_rtx_SET
12185 (VOIDmode, pc_rtx,
12186 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
12187 target2)));
12188 if (second_probability >= 0)
12189 REG_NOTES (i)
12190 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12191 GEN_INT (second_probability),
12192 REG_NOTES (i));
12193 }
12194 if (label != NULL_RTX)
12195 emit_label (label);
12196 }
12197
12198 int
12199 ix86_expand_setcc (enum rtx_code code, rtx dest)
12200 {
12201 rtx ret, tmp, tmpreg, equiv;
12202 rtx second_test, bypass_test;
12203
12204 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
12205 return 0; /* FAIL */
12206
12207 gcc_assert (GET_MODE (dest) == QImode);
12208
12209 ret = ix86_expand_compare (code, &second_test, &bypass_test);
12210 PUT_MODE (ret, QImode);
12211
12212 tmp = dest;
12213 tmpreg = dest;
12214
12215 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
12216 if (bypass_test || second_test)
12217 {
12218 rtx test = second_test;
12219 int bypass = 0;
12220 rtx tmp2 = gen_reg_rtx (QImode);
12221 if (bypass_test)
12222 {
12223 gcc_assert (!second_test);
12224 test = bypass_test;
12225 bypass = 1;
12226 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12227 }
12228 PUT_MODE (test, QImode);
12229 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12230
12231 if (bypass)
12232 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12233 else
12234 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12235 }
12236
12237 /* Attach a REG_EQUAL note describing the comparison result. */
12238 if (ix86_compare_op0 && ix86_compare_op1)
12239 {
12240 equiv = simplify_gen_relational (code, QImode,
12241 GET_MODE (ix86_compare_op0),
12242 ix86_compare_op0, ix86_compare_op1);
12243 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12244 }
12245
12246 return 1; /* DONE */
12247 }
12248
12249 /* Expand comparison setting or clearing carry flag. Return true when
12250 successful and set pop for the operation. */
12251 static bool
12252 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12253 {
12254 enum machine_mode mode =
12255 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12256
12257 /* Do not handle DImode compares that go through special path. */
12258 if (mode == (TARGET_64BIT ? TImode : DImode))
12259 return false;
12260
12261 if (SCALAR_FLOAT_MODE_P (mode))
12262 {
12263 rtx second_test = NULL, bypass_test = NULL;
12264 rtx compare_op, compare_seq;
12265
12266 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12267
12268 /* Shortcut: following common codes never translate
12269 into carry flag compares. */
12270 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12271 || code == ORDERED || code == UNORDERED)
12272 return false;
12273
12274 /* These comparisons require zero flag; swap operands so they won't. */
12275 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12276 && !TARGET_IEEE_FP)
12277 {
12278 rtx tmp = op0;
12279 op0 = op1;
12280 op1 = tmp;
12281 code = swap_condition (code);
12282 }
12283
12284 /* Try to expand the comparison and verify that we end up with
12285 carry flag based comparison. This fails to be true only when
12286 we decide to expand comparison using arithmetic that is not
12287 too common scenario. */
12288 start_sequence ();
12289 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12290 &second_test, &bypass_test);
12291 compare_seq = get_insns ();
12292 end_sequence ();
12293
12294 if (second_test || bypass_test)
12295 return false;
12296
12297 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12298 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12299 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12300 else
12301 code = GET_CODE (compare_op);
12302
12303 if (code != LTU && code != GEU)
12304 return false;
12305
12306 emit_insn (compare_seq);
12307 *pop = compare_op;
12308 return true;
12309 }
12310
12311 if (!INTEGRAL_MODE_P (mode))
12312 return false;
12313
12314 switch (code)
12315 {
12316 case LTU:
12317 case GEU:
12318 break;
12319
12320 /* Convert a==0 into (unsigned)a<1. */
12321 case EQ:
12322 case NE:
12323 if (op1 != const0_rtx)
12324 return false;
12325 op1 = const1_rtx;
12326 code = (code == EQ ? LTU : GEU);
12327 break;
12328
12329 /* Convert a>b into b<a or a>=b-1. */
12330 case GTU:
12331 case LEU:
12332 if (CONST_INT_P (op1))
12333 {
12334 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12335 /* Bail out on overflow. We still can swap operands but that
12336 would force loading of the constant into register. */
12337 if (op1 == const0_rtx
12338 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12339 return false;
12340 code = (code == GTU ? GEU : LTU);
12341 }
12342 else
12343 {
12344 rtx tmp = op1;
12345 op1 = op0;
12346 op0 = tmp;
12347 code = (code == GTU ? LTU : GEU);
12348 }
12349 break;
12350
12351 /* Convert a>=0 into (unsigned)a<0x80000000. */
12352 case LT:
12353 case GE:
12354 if (mode == DImode || op1 != const0_rtx)
12355 return false;
12356 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12357 code = (code == LT ? GEU : LTU);
12358 break;
12359 case LE:
12360 case GT:
12361 if (mode == DImode || op1 != constm1_rtx)
12362 return false;
12363 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12364 code = (code == LE ? GEU : LTU);
12365 break;
12366
12367 default:
12368 return false;
12369 }
12370 /* Swapping operands may cause constant to appear as first operand. */
12371 if (!nonimmediate_operand (op0, VOIDmode))
12372 {
12373 if (!can_create_pseudo_p ())
12374 return false;
12375 op0 = force_reg (mode, op0);
12376 }
12377 ix86_compare_op0 = op0;
12378 ix86_compare_op1 = op1;
12379 *pop = ix86_expand_compare (code, NULL, NULL);
12380 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12381 return true;
12382 }
12383
12384 int
12385 ix86_expand_int_movcc (rtx operands[])
12386 {
12387 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12388 rtx compare_seq, compare_op;
12389 rtx second_test, bypass_test;
12390 enum machine_mode mode = GET_MODE (operands[0]);
12391 bool sign_bit_compare_p = false;;
12392
12393 start_sequence ();
12394 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12395 compare_seq = get_insns ();
12396 end_sequence ();
12397
12398 compare_code = GET_CODE (compare_op);
12399
12400 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12401 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12402 sign_bit_compare_p = true;
12403
12404 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12405 HImode insns, we'd be swallowed in word prefix ops. */
12406
12407 if ((mode != HImode || TARGET_FAST_PREFIX)
12408 && (mode != (TARGET_64BIT ? TImode : DImode))
12409 && CONST_INT_P (operands[2])
12410 && CONST_INT_P (operands[3]))
12411 {
12412 rtx out = operands[0];
12413 HOST_WIDE_INT ct = INTVAL (operands[2]);
12414 HOST_WIDE_INT cf = INTVAL (operands[3]);
12415 HOST_WIDE_INT diff;
12416
12417 diff = ct - cf;
12418 /* Sign bit compares are better done using shifts than we do by using
12419 sbb. */
12420 if (sign_bit_compare_p
12421 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12422 ix86_compare_op1, &compare_op))
12423 {
12424 /* Detect overlap between destination and compare sources. */
12425 rtx tmp = out;
12426
12427 if (!sign_bit_compare_p)
12428 {
12429 bool fpcmp = false;
12430
12431 compare_code = GET_CODE (compare_op);
12432
12433 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12434 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12435 {
12436 fpcmp = true;
12437 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12438 }
12439
12440 /* To simplify rest of code, restrict to the GEU case. */
12441 if (compare_code == LTU)
12442 {
12443 HOST_WIDE_INT tmp = ct;
12444 ct = cf;
12445 cf = tmp;
12446 compare_code = reverse_condition (compare_code);
12447 code = reverse_condition (code);
12448 }
12449 else
12450 {
12451 if (fpcmp)
12452 PUT_CODE (compare_op,
12453 reverse_condition_maybe_unordered
12454 (GET_CODE (compare_op)));
12455 else
12456 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12457 }
12458 diff = ct - cf;
12459
12460 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12461 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12462 tmp = gen_reg_rtx (mode);
12463
12464 if (mode == DImode)
12465 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12466 else
12467 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12468 }
12469 else
12470 {
12471 if (code == GT || code == GE)
12472 code = reverse_condition (code);
12473 else
12474 {
12475 HOST_WIDE_INT tmp = ct;
12476 ct = cf;
12477 cf = tmp;
12478 diff = ct - cf;
12479 }
12480 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12481 ix86_compare_op1, VOIDmode, 0, -1);
12482 }
12483
12484 if (diff == 1)
12485 {
12486 /*
12487 * cmpl op0,op1
12488 * sbbl dest,dest
12489 * [addl dest, ct]
12490 *
12491 * Size 5 - 8.
12492 */
12493 if (ct)
12494 tmp = expand_simple_binop (mode, PLUS,
12495 tmp, GEN_INT (ct),
12496 copy_rtx (tmp), 1, OPTAB_DIRECT);
12497 }
12498 else if (cf == -1)
12499 {
12500 /*
12501 * cmpl op0,op1
12502 * sbbl dest,dest
12503 * orl $ct, dest
12504 *
12505 * Size 8.
12506 */
12507 tmp = expand_simple_binop (mode, IOR,
12508 tmp, GEN_INT (ct),
12509 copy_rtx (tmp), 1, OPTAB_DIRECT);
12510 }
12511 else if (diff == -1 && ct)
12512 {
12513 /*
12514 * cmpl op0,op1
12515 * sbbl dest,dest
12516 * notl dest
12517 * [addl dest, cf]
12518 *
12519 * Size 8 - 11.
12520 */
12521 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12522 if (cf)
12523 tmp = expand_simple_binop (mode, PLUS,
12524 copy_rtx (tmp), GEN_INT (cf),
12525 copy_rtx (tmp), 1, OPTAB_DIRECT);
12526 }
12527 else
12528 {
12529 /*
12530 * cmpl op0,op1
12531 * sbbl dest,dest
12532 * [notl dest]
12533 * andl cf - ct, dest
12534 * [addl dest, ct]
12535 *
12536 * Size 8 - 11.
12537 */
12538
12539 if (cf == 0)
12540 {
12541 cf = ct;
12542 ct = 0;
12543 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12544 }
12545
12546 tmp = expand_simple_binop (mode, AND,
12547 copy_rtx (tmp),
12548 gen_int_mode (cf - ct, mode),
12549 copy_rtx (tmp), 1, OPTAB_DIRECT);
12550 if (ct)
12551 tmp = expand_simple_binop (mode, PLUS,
12552 copy_rtx (tmp), GEN_INT (ct),
12553 copy_rtx (tmp), 1, OPTAB_DIRECT);
12554 }
12555
12556 if (!rtx_equal_p (tmp, out))
12557 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12558
12559 return 1; /* DONE */
12560 }
12561
12562 if (diff < 0)
12563 {
12564 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12565
12566 HOST_WIDE_INT tmp;
12567 tmp = ct, ct = cf, cf = tmp;
12568 diff = -diff;
12569
12570 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12571 {
12572 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12573
12574 /* We may be reversing unordered compare to normal compare, that
12575 is not valid in general (we may convert non-trapping condition
12576 to trapping one), however on i386 we currently emit all
12577 comparisons unordered. */
12578 compare_code = reverse_condition_maybe_unordered (compare_code);
12579 code = reverse_condition_maybe_unordered (code);
12580 }
12581 else
12582 {
12583 compare_code = reverse_condition (compare_code);
12584 code = reverse_condition (code);
12585 }
12586 }
12587
12588 compare_code = UNKNOWN;
12589 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12590 && CONST_INT_P (ix86_compare_op1))
12591 {
12592 if (ix86_compare_op1 == const0_rtx
12593 && (code == LT || code == GE))
12594 compare_code = code;
12595 else if (ix86_compare_op1 == constm1_rtx)
12596 {
12597 if (code == LE)
12598 compare_code = LT;
12599 else if (code == GT)
12600 compare_code = GE;
12601 }
12602 }
12603
12604 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12605 if (compare_code != UNKNOWN
12606 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12607 && (cf == -1 || ct == -1))
12608 {
12609 /* If lea code below could be used, only optimize
12610 if it results in a 2 insn sequence. */
12611
12612 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12613 || diff == 3 || diff == 5 || diff == 9)
12614 || (compare_code == LT && ct == -1)
12615 || (compare_code == GE && cf == -1))
12616 {
12617 /*
12618 * notl op1 (if necessary)
12619 * sarl $31, op1
12620 * orl cf, op1
12621 */
12622 if (ct != -1)
12623 {
12624 cf = ct;
12625 ct = -1;
12626 code = reverse_condition (code);
12627 }
12628
12629 out = emit_store_flag (out, code, ix86_compare_op0,
12630 ix86_compare_op1, VOIDmode, 0, -1);
12631
12632 out = expand_simple_binop (mode, IOR,
12633 out, GEN_INT (cf),
12634 out, 1, OPTAB_DIRECT);
12635 if (out != operands[0])
12636 emit_move_insn (operands[0], out);
12637
12638 return 1; /* DONE */
12639 }
12640 }
12641
12642
12643 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12644 || diff == 3 || diff == 5 || diff == 9)
12645 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12646 && (mode != DImode
12647 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12648 {
12649 /*
12650 * xorl dest,dest
12651 * cmpl op1,op2
12652 * setcc dest
12653 * lea cf(dest*(ct-cf)),dest
12654 *
12655 * Size 14.
12656 *
12657 * This also catches the degenerate setcc-only case.
12658 */
12659
12660 rtx tmp;
12661 int nops;
12662
12663 out = emit_store_flag (out, code, ix86_compare_op0,
12664 ix86_compare_op1, VOIDmode, 0, 1);
12665
12666 nops = 0;
12667 /* On x86_64 the lea instruction operates on Pmode, so we need
12668 to get arithmetics done in proper mode to match. */
12669 if (diff == 1)
12670 tmp = copy_rtx (out);
12671 else
12672 {
12673 rtx out1;
12674 out1 = copy_rtx (out);
12675 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12676 nops++;
12677 if (diff & 1)
12678 {
12679 tmp = gen_rtx_PLUS (mode, tmp, out1);
12680 nops++;
12681 }
12682 }
12683 if (cf != 0)
12684 {
12685 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12686 nops++;
12687 }
12688 if (!rtx_equal_p (tmp, out))
12689 {
12690 if (nops == 1)
12691 out = force_operand (tmp, copy_rtx (out));
12692 else
12693 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12694 }
12695 if (!rtx_equal_p (out, operands[0]))
12696 emit_move_insn (operands[0], copy_rtx (out));
12697
12698 return 1; /* DONE */
12699 }
12700
12701 /*
12702 * General case: Jumpful:
12703 * xorl dest,dest cmpl op1, op2
12704 * cmpl op1, op2 movl ct, dest
12705 * setcc dest jcc 1f
12706 * decl dest movl cf, dest
12707 * andl (cf-ct),dest 1:
12708 * addl ct,dest
12709 *
12710 * Size 20. Size 14.
12711 *
12712 * This is reasonably steep, but branch mispredict costs are
12713 * high on modern cpus, so consider failing only if optimizing
12714 * for space.
12715 */
12716
12717 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12718 && BRANCH_COST >= 2)
12719 {
12720 if (cf == 0)
12721 {
12722 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12723
12724 cf = ct;
12725 ct = 0;
12726
12727 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12728 {
12729 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12730
12731 /* We may be reversing unordered compare to normal compare,
12732 that is not valid in general (we may convert non-trapping
12733 condition to trapping one), however on i386 we currently
12734 emit all comparisons unordered. */
12735 code = reverse_condition_maybe_unordered (code);
12736 }
12737 else
12738 {
12739 code = reverse_condition (code);
12740 if (compare_code != UNKNOWN)
12741 compare_code = reverse_condition (compare_code);
12742 }
12743 }
12744
12745 if (compare_code != UNKNOWN)
12746 {
12747 /* notl op1 (if needed)
12748 sarl $31, op1
12749 andl (cf-ct), op1
12750 addl ct, op1
12751
12752 For x < 0 (resp. x <= -1) there will be no notl,
12753 so if possible swap the constants to get rid of the
12754 complement.
12755 True/false will be -1/0 while code below (store flag
12756 followed by decrement) is 0/-1, so the constants need
12757 to be exchanged once more. */
12758
12759 if (compare_code == GE || !cf)
12760 {
12761 code = reverse_condition (code);
12762 compare_code = LT;
12763 }
12764 else
12765 {
12766 HOST_WIDE_INT tmp = cf;
12767 cf = ct;
12768 ct = tmp;
12769 }
12770
12771 out = emit_store_flag (out, code, ix86_compare_op0,
12772 ix86_compare_op1, VOIDmode, 0, -1);
12773 }
12774 else
12775 {
12776 out = emit_store_flag (out, code, ix86_compare_op0,
12777 ix86_compare_op1, VOIDmode, 0, 1);
12778
12779 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12780 copy_rtx (out), 1, OPTAB_DIRECT);
12781 }
12782
12783 out = expand_simple_binop (mode, AND, copy_rtx (out),
12784 gen_int_mode (cf - ct, mode),
12785 copy_rtx (out), 1, OPTAB_DIRECT);
12786 if (ct)
12787 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12788 copy_rtx (out), 1, OPTAB_DIRECT);
12789 if (!rtx_equal_p (out, operands[0]))
12790 emit_move_insn (operands[0], copy_rtx (out));
12791
12792 return 1; /* DONE */
12793 }
12794 }
12795
12796 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12797 {
12798 /* Try a few things more with specific constants and a variable. */
12799
12800 optab op;
12801 rtx var, orig_out, out, tmp;
12802
12803 if (BRANCH_COST <= 2)
12804 return 0; /* FAIL */
12805
12806 /* If one of the two operands is an interesting constant, load a
12807 constant with the above and mask it in with a logical operation. */
12808
12809 if (CONST_INT_P (operands[2]))
12810 {
12811 var = operands[3];
12812 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12813 operands[3] = constm1_rtx, op = and_optab;
12814 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12815 operands[3] = const0_rtx, op = ior_optab;
12816 else
12817 return 0; /* FAIL */
12818 }
12819 else if (CONST_INT_P (operands[3]))
12820 {
12821 var = operands[2];
12822 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12823 operands[2] = constm1_rtx, op = and_optab;
12824 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12825 operands[2] = const0_rtx, op = ior_optab;
12826 else
12827 return 0; /* FAIL */
12828 }
12829 else
12830 return 0; /* FAIL */
12831
12832 orig_out = operands[0];
12833 tmp = gen_reg_rtx (mode);
12834 operands[0] = tmp;
12835
12836 /* Recurse to get the constant loaded. */
12837 if (ix86_expand_int_movcc (operands) == 0)
12838 return 0; /* FAIL */
12839
12840 /* Mask in the interesting variable. */
12841 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12842 OPTAB_WIDEN);
12843 if (!rtx_equal_p (out, orig_out))
12844 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12845
12846 return 1; /* DONE */
12847 }
12848
12849 /*
12850 * For comparison with above,
12851 *
12852 * movl cf,dest
12853 * movl ct,tmp
12854 * cmpl op1,op2
12855 * cmovcc tmp,dest
12856 *
12857 * Size 15.
12858 */
12859
12860 if (! nonimmediate_operand (operands[2], mode))
12861 operands[2] = force_reg (mode, operands[2]);
12862 if (! nonimmediate_operand (operands[3], mode))
12863 operands[3] = force_reg (mode, operands[3]);
12864
12865 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12866 {
12867 rtx tmp = gen_reg_rtx (mode);
12868 emit_move_insn (tmp, operands[3]);
12869 operands[3] = tmp;
12870 }
12871 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12872 {
12873 rtx tmp = gen_reg_rtx (mode);
12874 emit_move_insn (tmp, operands[2]);
12875 operands[2] = tmp;
12876 }
12877
12878 if (! register_operand (operands[2], VOIDmode)
12879 && (mode == QImode
12880 || ! register_operand (operands[3], VOIDmode)))
12881 operands[2] = force_reg (mode, operands[2]);
12882
12883 if (mode == QImode
12884 && ! register_operand (operands[3], VOIDmode))
12885 operands[3] = force_reg (mode, operands[3]);
12886
12887 emit_insn (compare_seq);
12888 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12889 gen_rtx_IF_THEN_ELSE (mode,
12890 compare_op, operands[2],
12891 operands[3])));
12892 if (bypass_test)
12893 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12894 gen_rtx_IF_THEN_ELSE (mode,
12895 bypass_test,
12896 copy_rtx (operands[3]),
12897 copy_rtx (operands[0]))));
12898 if (second_test)
12899 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12900 gen_rtx_IF_THEN_ELSE (mode,
12901 second_test,
12902 copy_rtx (operands[2]),
12903 copy_rtx (operands[0]))));
12904
12905 return 1; /* DONE */
12906 }
12907
12908 /* Swap, force into registers, or otherwise massage the two operands
12909 to an sse comparison with a mask result. Thus we differ a bit from
12910 ix86_prepare_fp_compare_args which expects to produce a flags result.
12911
12912 The DEST operand exists to help determine whether to commute commutative
12913 operators. The POP0/POP1 operands are updated in place. The new
12914 comparison code is returned, or UNKNOWN if not implementable. */
12915
12916 static enum rtx_code
12917 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12918 rtx *pop0, rtx *pop1)
12919 {
12920 rtx tmp;
12921
12922 switch (code)
12923 {
12924 case LTGT:
12925 case UNEQ:
12926 /* We have no LTGT as an operator. We could implement it with
12927 NE & ORDERED, but this requires an extra temporary. It's
12928 not clear that it's worth it. */
12929 return UNKNOWN;
12930
12931 case LT:
12932 case LE:
12933 case UNGT:
12934 case UNGE:
12935 /* These are supported directly. */
12936 break;
12937
12938 case EQ:
12939 case NE:
12940 case UNORDERED:
12941 case ORDERED:
12942 /* For commutative operators, try to canonicalize the destination
12943 operand to be first in the comparison - this helps reload to
12944 avoid extra moves. */
12945 if (!dest || !rtx_equal_p (dest, *pop1))
12946 break;
12947 /* FALLTHRU */
12948
12949 case GE:
12950 case GT:
12951 case UNLE:
12952 case UNLT:
12953 /* These are not supported directly. Swap the comparison operands
12954 to transform into something that is supported. */
12955 tmp = *pop0;
12956 *pop0 = *pop1;
12957 *pop1 = tmp;
12958 code = swap_condition (code);
12959 break;
12960
12961 default:
12962 gcc_unreachable ();
12963 }
12964
12965 return code;
12966 }
12967
12968 /* Detect conditional moves that exactly match min/max operational
12969 semantics. Note that this is IEEE safe, as long as we don't
12970 interchange the operands.
12971
12972 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12973 and TRUE if the operation is successful and instructions are emitted. */
12974
12975 static bool
12976 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12977 rtx cmp_op1, rtx if_true, rtx if_false)
12978 {
12979 enum machine_mode mode;
12980 bool is_min;
12981 rtx tmp;
12982
12983 if (code == LT)
12984 ;
12985 else if (code == UNGE)
12986 {
12987 tmp = if_true;
12988 if_true = if_false;
12989 if_false = tmp;
12990 }
12991 else
12992 return false;
12993
12994 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12995 is_min = true;
12996 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12997 is_min = false;
12998 else
12999 return false;
13000
13001 mode = GET_MODE (dest);
13002
13003 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13004 but MODE may be a vector mode and thus not appropriate. */
13005 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
13006 {
13007 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
13008 rtvec v;
13009
13010 if_true = force_reg (mode, if_true);
13011 v = gen_rtvec (2, if_true, if_false);
13012 tmp = gen_rtx_UNSPEC (mode, v, u);
13013 }
13014 else
13015 {
13016 code = is_min ? SMIN : SMAX;
13017 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
13018 }
13019
13020 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
13021 return true;
13022 }
13023
13024 /* Expand an sse vector comparison. Return the register with the result. */
13025
13026 static rtx
13027 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
13028 rtx op_true, rtx op_false)
13029 {
13030 enum machine_mode mode = GET_MODE (dest);
13031 rtx x;
13032
13033 cmp_op0 = force_reg (mode, cmp_op0);
13034 if (!nonimmediate_operand (cmp_op1, mode))
13035 cmp_op1 = force_reg (mode, cmp_op1);
13036
13037 if (optimize
13038 || reg_overlap_mentioned_p (dest, op_true)
13039 || reg_overlap_mentioned_p (dest, op_false))
13040 dest = gen_reg_rtx (mode);
13041
13042 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
13043 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13044
13045 return dest;
13046 }
13047
13048 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13049 operations. This is used for both scalar and vector conditional moves. */
13050
13051 static void
13052 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
13053 {
13054 enum machine_mode mode = GET_MODE (dest);
13055 rtx t2, t3, x;
13056
13057 if (TARGET_SSE5)
13058 {
13059 rtx pcmov = gen_rtx_SET (mode, dest,
13060 gen_rtx_IF_THEN_ELSE (mode, cmp,
13061 op_true,
13062 op_false));
13063 emit_insn (pcmov);
13064 }
13065 else if (op_false == CONST0_RTX (mode))
13066 {
13067 op_true = force_reg (mode, op_true);
13068 x = gen_rtx_AND (mode, cmp, op_true);
13069 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13070 }
13071 else if (op_true == CONST0_RTX (mode))
13072 {
13073 op_false = force_reg (mode, op_false);
13074 x = gen_rtx_NOT (mode, cmp);
13075 x = gen_rtx_AND (mode, x, op_false);
13076 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13077 }
13078 else
13079 {
13080 op_true = force_reg (mode, op_true);
13081 op_false = force_reg (mode, op_false);
13082
13083 t2 = gen_reg_rtx (mode);
13084 if (optimize)
13085 t3 = gen_reg_rtx (mode);
13086 else
13087 t3 = dest;
13088
13089 x = gen_rtx_AND (mode, op_true, cmp);
13090 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
13091
13092 x = gen_rtx_NOT (mode, cmp);
13093 x = gen_rtx_AND (mode, x, op_false);
13094 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
13095
13096 x = gen_rtx_IOR (mode, t3, t2);
13097 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13098 }
13099 }
13100
13101 /* Expand a floating-point conditional move. Return true if successful. */
13102
13103 int
13104 ix86_expand_fp_movcc (rtx operands[])
13105 {
13106 enum machine_mode mode = GET_MODE (operands[0]);
13107 enum rtx_code code = GET_CODE (operands[1]);
13108 rtx tmp, compare_op, second_test, bypass_test;
13109
13110 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
13111 {
13112 enum machine_mode cmode;
13113
13114 /* Since we've no cmove for sse registers, don't force bad register
13115 allocation just to gain access to it. Deny movcc when the
13116 comparison mode doesn't match the move mode. */
13117 cmode = GET_MODE (ix86_compare_op0);
13118 if (cmode == VOIDmode)
13119 cmode = GET_MODE (ix86_compare_op1);
13120 if (cmode != mode)
13121 return 0;
13122
13123 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13124 &ix86_compare_op0,
13125 &ix86_compare_op1);
13126 if (code == UNKNOWN)
13127 return 0;
13128
13129 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
13130 ix86_compare_op1, operands[2],
13131 operands[3]))
13132 return 1;
13133
13134 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
13135 ix86_compare_op1, operands[2], operands[3]);
13136 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
13137 return 1;
13138 }
13139
13140 /* The floating point conditional move instructions don't directly
13141 support conditions resulting from a signed integer comparison. */
13142
13143 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13144
13145 /* The floating point conditional move instructions don't directly
13146 support signed integer comparisons. */
13147
13148 if (!fcmov_comparison_operator (compare_op, VOIDmode))
13149 {
13150 gcc_assert (!second_test && !bypass_test);
13151 tmp = gen_reg_rtx (QImode);
13152 ix86_expand_setcc (code, tmp);
13153 code = NE;
13154 ix86_compare_op0 = tmp;
13155 ix86_compare_op1 = const0_rtx;
13156 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13157 }
13158 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13159 {
13160 tmp = gen_reg_rtx (mode);
13161 emit_move_insn (tmp, operands[3]);
13162 operands[3] = tmp;
13163 }
13164 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13165 {
13166 tmp = gen_reg_rtx (mode);
13167 emit_move_insn (tmp, operands[2]);
13168 operands[2] = tmp;
13169 }
13170
13171 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13172 gen_rtx_IF_THEN_ELSE (mode, compare_op,
13173 operands[2], operands[3])));
13174 if (bypass_test)
13175 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13176 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
13177 operands[3], operands[0])));
13178 if (second_test)
13179 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13180 gen_rtx_IF_THEN_ELSE (mode, second_test,
13181 operands[2], operands[0])));
13182
13183 return 1;
13184 }
13185
13186 /* Expand a floating-point vector conditional move; a vcond operation
13187 rather than a movcc operation. */
13188
13189 bool
13190 ix86_expand_fp_vcond (rtx operands[])
13191 {
13192 enum rtx_code code = GET_CODE (operands[3]);
13193 rtx cmp;
13194
13195 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13196 &operands[4], &operands[5]);
13197 if (code == UNKNOWN)
13198 return false;
13199
13200 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
13201 operands[5], operands[1], operands[2]))
13202 return true;
13203
13204 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
13205 operands[1], operands[2]);
13206 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
13207 return true;
13208 }
13209
13210 /* Expand a signed/unsigned integral vector conditional move. */
13211
13212 bool
13213 ix86_expand_int_vcond (rtx operands[])
13214 {
13215 enum machine_mode mode = GET_MODE (operands[0]);
13216 enum rtx_code code = GET_CODE (operands[3]);
13217 bool negate = false;
13218 rtx x, cop0, cop1;
13219
13220 cop0 = operands[4];
13221 cop1 = operands[5];
13222
13223 /* Canonicalize the comparison to EQ, GT, GTU. */
13224 switch (code)
13225 {
13226 case EQ:
13227 case GT:
13228 case GTU:
13229 break;
13230
13231 case NE:
13232 case LE:
13233 case LEU:
13234 code = reverse_condition (code);
13235 negate = true;
13236 break;
13237
13238 case GE:
13239 case GEU:
13240 code = reverse_condition (code);
13241 negate = true;
13242 /* FALLTHRU */
13243
13244 case LT:
13245 case LTU:
13246 code = swap_condition (code);
13247 x = cop0, cop0 = cop1, cop1 = x;
13248 break;
13249
13250 default:
13251 gcc_unreachable ();
13252 }
13253
13254 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13255 if (mode == V2DImode)
13256 {
13257 switch (code)
13258 {
13259 case EQ:
13260 /* SSE4.1 supports EQ. */
13261 if (!TARGET_SSE4_1)
13262 return false;
13263 break;
13264
13265 case GT:
13266 case GTU:
13267 /* SSE4.2 supports GT/GTU. */
13268 if (!TARGET_SSE4_2)
13269 return false;
13270 break;
13271
13272 default:
13273 gcc_unreachable ();
13274 }
13275 }
13276
13277 /* Unsigned parallel compare is not supported by the hardware. Play some
13278 tricks to turn this into a signed comparison against 0. */
13279 if (code == GTU)
13280 {
13281 cop0 = force_reg (mode, cop0);
13282
13283 switch (mode)
13284 {
13285 case V4SImode:
13286 case V2DImode:
13287 {
13288 rtx t1, t2, mask;
13289
13290 /* Perform a parallel modulo subtraction. */
13291 t1 = gen_reg_rtx (mode);
13292 emit_insn ((mode == V4SImode
13293 ? gen_subv4si3
13294 : gen_subv2di3) (t1, cop0, cop1));
13295
13296 /* Extract the original sign bit of op0. */
13297 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13298 true, false);
13299 t2 = gen_reg_rtx (mode);
13300 emit_insn ((mode == V4SImode
13301 ? gen_andv4si3
13302 : gen_andv2di3) (t2, cop0, mask));
13303
13304 /* XOR it back into the result of the subtraction. This results
13305 in the sign bit set iff we saw unsigned underflow. */
13306 x = gen_reg_rtx (mode);
13307 emit_insn ((mode == V4SImode
13308 ? gen_xorv4si3
13309 : gen_xorv2di3) (x, t1, t2));
13310
13311 code = GT;
13312 }
13313 break;
13314
13315 case V16QImode:
13316 case V8HImode:
13317 /* Perform a parallel unsigned saturating subtraction. */
13318 x = gen_reg_rtx (mode);
13319 emit_insn (gen_rtx_SET (VOIDmode, x,
13320 gen_rtx_US_MINUS (mode, cop0, cop1)));
13321
13322 code = EQ;
13323 negate = !negate;
13324 break;
13325
13326 default:
13327 gcc_unreachable ();
13328 }
13329
13330 cop0 = x;
13331 cop1 = CONST0_RTX (mode);
13332 }
13333
13334 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13335 operands[1+negate], operands[2-negate]);
13336
13337 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13338 operands[2-negate]);
13339 return true;
13340 }
13341
13342 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13343 true if we should do zero extension, else sign extension. HIGH_P is
13344 true if we want the N/2 high elements, else the low elements. */
13345
13346 void
13347 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13348 {
13349 enum machine_mode imode = GET_MODE (operands[1]);
13350 rtx (*unpack)(rtx, rtx, rtx);
13351 rtx se, dest;
13352
13353 switch (imode)
13354 {
13355 case V16QImode:
13356 if (high_p)
13357 unpack = gen_vec_interleave_highv16qi;
13358 else
13359 unpack = gen_vec_interleave_lowv16qi;
13360 break;
13361 case V8HImode:
13362 if (high_p)
13363 unpack = gen_vec_interleave_highv8hi;
13364 else
13365 unpack = gen_vec_interleave_lowv8hi;
13366 break;
13367 case V4SImode:
13368 if (high_p)
13369 unpack = gen_vec_interleave_highv4si;
13370 else
13371 unpack = gen_vec_interleave_lowv4si;
13372 break;
13373 default:
13374 gcc_unreachable ();
13375 }
13376
13377 dest = gen_lowpart (imode, operands[0]);
13378
13379 if (unsigned_p)
13380 se = force_reg (imode, CONST0_RTX (imode));
13381 else
13382 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13383 operands[1], pc_rtx, pc_rtx);
13384
13385 emit_insn (unpack (dest, operands[1], se));
13386 }
13387
13388 /* This function performs the same task as ix86_expand_sse_unpack,
13389 but with SSE4.1 instructions. */
13390
13391 void
13392 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13393 {
13394 enum machine_mode imode = GET_MODE (operands[1]);
13395 rtx (*unpack)(rtx, rtx);
13396 rtx src, dest;
13397
13398 switch (imode)
13399 {
13400 case V16QImode:
13401 if (unsigned_p)
13402 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13403 else
13404 unpack = gen_sse4_1_extendv8qiv8hi2;
13405 break;
13406 case V8HImode:
13407 if (unsigned_p)
13408 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13409 else
13410 unpack = gen_sse4_1_extendv4hiv4si2;
13411 break;
13412 case V4SImode:
13413 if (unsigned_p)
13414 unpack = gen_sse4_1_zero_extendv2siv2di2;
13415 else
13416 unpack = gen_sse4_1_extendv2siv2di2;
13417 break;
13418 default:
13419 gcc_unreachable ();
13420 }
13421
13422 dest = operands[0];
13423 if (high_p)
13424 {
13425 /* Shift higher 8 bytes to lower 8 bytes. */
13426 src = gen_reg_rtx (imode);
13427 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13428 gen_lowpart (TImode, operands[1]),
13429 GEN_INT (64)));
13430 }
13431 else
13432 src = operands[1];
13433
13434 emit_insn (unpack (dest, src));
13435 }
13436
13437 /* This function performs the same task as ix86_expand_sse_unpack,
13438 but with amdfam15 instructions. */
13439
13440 #define PPERM_SRC 0x00 /* copy source */
13441 #define PPERM_INVERT 0x20 /* invert source */
13442 #define PPERM_REVERSE 0x40 /* bit reverse source */
13443 #define PPERM_REV_INV 0x60 /* bit reverse & invert src */
13444 #define PPERM_ZERO 0x80 /* all 0's */
13445 #define PPERM_ONES 0xa0 /* all 1's */
13446 #define PPERM_SIGN 0xc0 /* propigate sign bit */
13447 #define PPERM_INV_SIGN 0xe0 /* invert & propigate sign */
13448
13449 #define PPERM_SRC1 0x00 /* use first source byte */
13450 #define PPERM_SRC2 0x10 /* use second source byte */
13451
13452 void
13453 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13454 {
13455 enum machine_mode imode = GET_MODE (operands[1]);
13456 int pperm_bytes[16];
13457 int i;
13458 int h = (high_p) ? 8 : 0;
13459 int h2;
13460 int sign_extend;
13461 rtvec v = rtvec_alloc (16);
13462 rtvec vs;
13463 rtx x, p;
13464 rtx op0 = operands[0], op1 = operands[1];
13465
13466 switch (imode)
13467 {
13468 case V16QImode:
13469 vs = rtvec_alloc (8);
13470 h2 = (high_p) ? 8 : 0;
13471 for (i = 0; i < 8; i++)
13472 {
13473 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
13474 pperm_bytes[2*i+1] = ((unsigned_p)
13475 ? PPERM_ZERO
13476 : PPERM_SIGN | PPERM_SRC2 | i | h);
13477 }
13478
13479 for (i = 0; i < 16; i++)
13480 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13481
13482 for (i = 0; i < 8; i++)
13483 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13484
13485 p = gen_rtx_PARALLEL (VOIDmode, vs);
13486 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13487 if (unsigned_p)
13488 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
13489 else
13490 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
13491 break;
13492
13493 case V8HImode:
13494 vs = rtvec_alloc (4);
13495 h2 = (high_p) ? 4 : 0;
13496 for (i = 0; i < 4; i++)
13497 {
13498 sign_extend = ((unsigned_p)
13499 ? PPERM_ZERO
13500 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
13501 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
13502 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
13503 pperm_bytes[4*i+2] = sign_extend;
13504 pperm_bytes[4*i+3] = sign_extend;
13505 }
13506
13507 for (i = 0; i < 16; i++)
13508 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13509
13510 for (i = 0; i < 4; i++)
13511 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13512
13513 p = gen_rtx_PARALLEL (VOIDmode, vs);
13514 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13515 if (unsigned_p)
13516 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
13517 else
13518 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
13519 break;
13520
13521 case V4SImode:
13522 vs = rtvec_alloc (2);
13523 h2 = (high_p) ? 2 : 0;
13524 for (i = 0; i < 2; i++)
13525 {
13526 sign_extend = ((unsigned_p)
13527 ? PPERM_ZERO
13528 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
13529 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
13530 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
13531 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
13532 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
13533 pperm_bytes[8*i+4] = sign_extend;
13534 pperm_bytes[8*i+5] = sign_extend;
13535 pperm_bytes[8*i+6] = sign_extend;
13536 pperm_bytes[8*i+7] = sign_extend;
13537 }
13538
13539 for (i = 0; i < 16; i++)
13540 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13541
13542 for (i = 0; i < 4; i++)
13543 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13544
13545 p = gen_rtx_PARALLEL (VOIDmode, vs);
13546 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13547 if (unsigned_p)
13548 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
13549 else
13550 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
13551 break;
13552
13553 default:
13554 gcc_unreachable ();
13555 }
13556
13557 return;
13558 }
13559
13560 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13561 next narrower integer vector type */
13562 void
13563 ix86_expand_sse5_pack (rtx operands[3])
13564 {
13565 enum machine_mode imode = GET_MODE (operands[0]);
13566 int pperm_bytes[16];
13567 int i;
13568 rtvec v = rtvec_alloc (16);
13569 rtx x;
13570 rtx op0 = operands[0];
13571 rtx op1 = operands[1];
13572 rtx op2 = operands[2];
13573
13574 switch (imode)
13575 {
13576 case V16QImode:
13577 for (i = 0; i < 8; i++)
13578 {
13579 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
13580 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
13581 }
13582
13583 for (i = 0; i < 16; i++)
13584 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13585
13586 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13587 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
13588 break;
13589
13590 case V8HImode:
13591 for (i = 0; i < 4; i++)
13592 {
13593 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
13594 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
13595 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
13596 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
13597 }
13598
13599 for (i = 0; i < 16; i++)
13600 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13601
13602 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13603 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
13604 break;
13605
13606 case V4SImode:
13607 for (i = 0; i < 2; i++)
13608 {
13609 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
13610 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
13611 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
13612 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
13613 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
13614 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
13615 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
13616 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
13617 }
13618
13619 for (i = 0; i < 16; i++)
13620 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13621
13622 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13623 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
13624 break;
13625
13626 default:
13627 gcc_unreachable ();
13628 }
13629
13630 return;
13631 }
13632
13633 /* Expand conditional increment or decrement using adb/sbb instructions.
13634 The default case using setcc followed by the conditional move can be
13635 done by generic code. */
13636 int
13637 ix86_expand_int_addcc (rtx operands[])
13638 {
13639 enum rtx_code code = GET_CODE (operands[1]);
13640 rtx compare_op;
13641 rtx val = const0_rtx;
13642 bool fpcmp = false;
13643 enum machine_mode mode = GET_MODE (operands[0]);
13644
13645 if (operands[3] != const1_rtx
13646 && operands[3] != constm1_rtx)
13647 return 0;
13648 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13649 ix86_compare_op1, &compare_op))
13650 return 0;
13651 code = GET_CODE (compare_op);
13652
13653 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13654 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13655 {
13656 fpcmp = true;
13657 code = ix86_fp_compare_code_to_integer (code);
13658 }
13659
13660 if (code != LTU)
13661 {
13662 val = constm1_rtx;
13663 if (fpcmp)
13664 PUT_CODE (compare_op,
13665 reverse_condition_maybe_unordered
13666 (GET_CODE (compare_op)));
13667 else
13668 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13669 }
13670 PUT_MODE (compare_op, mode);
13671
13672 /* Construct either adc or sbb insn. */
13673 if ((code == LTU) == (operands[3] == constm1_rtx))
13674 {
13675 switch (GET_MODE (operands[0]))
13676 {
13677 case QImode:
13678 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13679 break;
13680 case HImode:
13681 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13682 break;
13683 case SImode:
13684 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13685 break;
13686 case DImode:
13687 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13688 break;
13689 default:
13690 gcc_unreachable ();
13691 }
13692 }
13693 else
13694 {
13695 switch (GET_MODE (operands[0]))
13696 {
13697 case QImode:
13698 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13699 break;
13700 case HImode:
13701 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13702 break;
13703 case SImode:
13704 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13705 break;
13706 case DImode:
13707 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13708 break;
13709 default:
13710 gcc_unreachable ();
13711 }
13712 }
13713 return 1; /* DONE */
13714 }
13715
13716
13717 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13718 works for floating pointer parameters and nonoffsetable memories.
13719 For pushes, it returns just stack offsets; the values will be saved
13720 in the right order. Maximally three parts are generated. */
13721
13722 static int
13723 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13724 {
13725 int size;
13726
13727 if (!TARGET_64BIT)
13728 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13729 else
13730 size = (GET_MODE_SIZE (mode) + 4) / 8;
13731
13732 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13733 gcc_assert (size >= 2 && size <= 3);
13734
13735 /* Optimize constant pool reference to immediates. This is used by fp
13736 moves, that force all constants to memory to allow combining. */
13737 if (MEM_P (operand) && MEM_READONLY_P (operand))
13738 {
13739 rtx tmp = maybe_get_pool_constant (operand);
13740 if (tmp)
13741 operand = tmp;
13742 }
13743
13744 if (MEM_P (operand) && !offsettable_memref_p (operand))
13745 {
13746 /* The only non-offsetable memories we handle are pushes. */
13747 int ok = push_operand (operand, VOIDmode);
13748
13749 gcc_assert (ok);
13750
13751 operand = copy_rtx (operand);
13752 PUT_MODE (operand, Pmode);
13753 parts[0] = parts[1] = parts[2] = operand;
13754 return size;
13755 }
13756
13757 if (GET_CODE (operand) == CONST_VECTOR)
13758 {
13759 enum machine_mode imode = int_mode_for_mode (mode);
13760 /* Caution: if we looked through a constant pool memory above,
13761 the operand may actually have a different mode now. That's
13762 ok, since we want to pun this all the way back to an integer. */
13763 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13764 gcc_assert (operand != NULL);
13765 mode = imode;
13766 }
13767
13768 if (!TARGET_64BIT)
13769 {
13770 if (mode == DImode)
13771 split_di (&operand, 1, &parts[0], &parts[1]);
13772 else
13773 {
13774 if (REG_P (operand))
13775 {
13776 gcc_assert (reload_completed);
13777 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13778 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13779 if (size == 3)
13780 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13781 }
13782 else if (offsettable_memref_p (operand))
13783 {
13784 operand = adjust_address (operand, SImode, 0);
13785 parts[0] = operand;
13786 parts[1] = adjust_address (operand, SImode, 4);
13787 if (size == 3)
13788 parts[2] = adjust_address (operand, SImode, 8);
13789 }
13790 else if (GET_CODE (operand) == CONST_DOUBLE)
13791 {
13792 REAL_VALUE_TYPE r;
13793 long l[4];
13794
13795 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13796 switch (mode)
13797 {
13798 case XFmode:
13799 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
13800 parts[2] = gen_int_mode (l[2], SImode);
13801 break;
13802 case DFmode:
13803 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13804 break;
13805 default:
13806 gcc_unreachable ();
13807 }
13808 parts[1] = gen_int_mode (l[1], SImode);
13809 parts[0] = gen_int_mode (l[0], SImode);
13810 }
13811 else
13812 gcc_unreachable ();
13813 }
13814 }
13815 else
13816 {
13817 if (mode == TImode)
13818 split_ti (&operand, 1, &parts[0], &parts[1]);
13819 if (mode == XFmode || mode == TFmode)
13820 {
13821 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13822 if (REG_P (operand))
13823 {
13824 gcc_assert (reload_completed);
13825 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13826 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13827 }
13828 else if (offsettable_memref_p (operand))
13829 {
13830 operand = adjust_address (operand, DImode, 0);
13831 parts[0] = operand;
13832 parts[1] = adjust_address (operand, upper_mode, 8);
13833 }
13834 else if (GET_CODE (operand) == CONST_DOUBLE)
13835 {
13836 REAL_VALUE_TYPE r;
13837 long l[4];
13838
13839 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13840 real_to_target (l, &r, mode);
13841
13842 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13843 if (HOST_BITS_PER_WIDE_INT >= 64)
13844 parts[0]
13845 = gen_int_mode
13846 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13847 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13848 DImode);
13849 else
13850 parts[0] = immed_double_const (l[0], l[1], DImode);
13851
13852 if (upper_mode == SImode)
13853 parts[1] = gen_int_mode (l[2], SImode);
13854 else if (HOST_BITS_PER_WIDE_INT >= 64)
13855 parts[1]
13856 = gen_int_mode
13857 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13858 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13859 DImode);
13860 else
13861 parts[1] = immed_double_const (l[2], l[3], DImode);
13862 }
13863 else
13864 gcc_unreachable ();
13865 }
13866 }
13867
13868 return size;
13869 }
13870
13871 /* Emit insns to perform a move or push of DI, DF, and XF values.
13872 Return false when normal moves are needed; true when all required
13873 insns have been emitted. Operands 2-4 contain the input values
13874 int the correct order; operands 5-7 contain the output values. */
13875
13876 void
13877 ix86_split_long_move (rtx operands[])
13878 {
13879 rtx part[2][3];
13880 int nparts;
13881 int push = 0;
13882 int collisions = 0;
13883 enum machine_mode mode = GET_MODE (operands[0]);
13884
13885 /* The DFmode expanders may ask us to move double.
13886 For 64bit target this is single move. By hiding the fact
13887 here we simplify i386.md splitters. */
13888 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13889 {
13890 /* Optimize constant pool reference to immediates. This is used by
13891 fp moves, that force all constants to memory to allow combining. */
13892
13893 if (MEM_P (operands[1])
13894 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13895 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13896 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13897 if (push_operand (operands[0], VOIDmode))
13898 {
13899 operands[0] = copy_rtx (operands[0]);
13900 PUT_MODE (operands[0], Pmode);
13901 }
13902 else
13903 operands[0] = gen_lowpart (DImode, operands[0]);
13904 operands[1] = gen_lowpart (DImode, operands[1]);
13905 emit_move_insn (operands[0], operands[1]);
13906 return;
13907 }
13908
13909 /* The only non-offsettable memory we handle is push. */
13910 if (push_operand (operands[0], VOIDmode))
13911 push = 1;
13912 else
13913 gcc_assert (!MEM_P (operands[0])
13914 || offsettable_memref_p (operands[0]));
13915
13916 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13917 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13918
13919 /* When emitting push, take care for source operands on the stack. */
13920 if (push && MEM_P (operands[1])
13921 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13922 {
13923 if (nparts == 3)
13924 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13925 XEXP (part[1][2], 0));
13926 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13927 XEXP (part[1][1], 0));
13928 }
13929
13930 /* We need to do copy in the right order in case an address register
13931 of the source overlaps the destination. */
13932 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13933 {
13934 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13935 collisions++;
13936 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13937 collisions++;
13938 if (nparts == 3
13939 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13940 collisions++;
13941
13942 /* Collision in the middle part can be handled by reordering. */
13943 if (collisions == 1 && nparts == 3
13944 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13945 {
13946 rtx tmp;
13947 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13948 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13949 }
13950
13951 /* If there are more collisions, we can't handle it by reordering.
13952 Do an lea to the last part and use only one colliding move. */
13953 else if (collisions > 1)
13954 {
13955 rtx base;
13956
13957 collisions = 1;
13958
13959 base = part[0][nparts - 1];
13960
13961 /* Handle the case when the last part isn't valid for lea.
13962 Happens in 64-bit mode storing the 12-byte XFmode. */
13963 if (GET_MODE (base) != Pmode)
13964 base = gen_rtx_REG (Pmode, REGNO (base));
13965
13966 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13967 part[1][0] = replace_equiv_address (part[1][0], base);
13968 part[1][1] = replace_equiv_address (part[1][1],
13969 plus_constant (base, UNITS_PER_WORD));
13970 if (nparts == 3)
13971 part[1][2] = replace_equiv_address (part[1][2],
13972 plus_constant (base, 8));
13973 }
13974 }
13975
13976 if (push)
13977 {
13978 if (!TARGET_64BIT)
13979 {
13980 if (nparts == 3)
13981 {
13982 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13983 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13984 emit_move_insn (part[0][2], part[1][2]);
13985 }
13986 }
13987 else
13988 {
13989 /* In 64bit mode we don't have 32bit push available. In case this is
13990 register, it is OK - we will just use larger counterpart. We also
13991 retype memory - these comes from attempt to avoid REX prefix on
13992 moving of second half of TFmode value. */
13993 if (GET_MODE (part[1][1]) == SImode)
13994 {
13995 switch (GET_CODE (part[1][1]))
13996 {
13997 case MEM:
13998 part[1][1] = adjust_address (part[1][1], DImode, 0);
13999 break;
14000
14001 case REG:
14002 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
14003 break;
14004
14005 default:
14006 gcc_unreachable ();
14007 }
14008
14009 if (GET_MODE (part[1][0]) == SImode)
14010 part[1][0] = part[1][1];
14011 }
14012 }
14013 emit_move_insn (part[0][1], part[1][1]);
14014 emit_move_insn (part[0][0], part[1][0]);
14015 return;
14016 }
14017
14018 /* Choose correct order to not overwrite the source before it is copied. */
14019 if ((REG_P (part[0][0])
14020 && REG_P (part[1][1])
14021 && (REGNO (part[0][0]) == REGNO (part[1][1])
14022 || (nparts == 3
14023 && REGNO (part[0][0]) == REGNO (part[1][2]))))
14024 || (collisions > 0
14025 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
14026 {
14027 if (nparts == 3)
14028 {
14029 operands[2] = part[0][2];
14030 operands[3] = part[0][1];
14031 operands[4] = part[0][0];
14032 operands[5] = part[1][2];
14033 operands[6] = part[1][1];
14034 operands[7] = part[1][0];
14035 }
14036 else
14037 {
14038 operands[2] = part[0][1];
14039 operands[3] = part[0][0];
14040 operands[5] = part[1][1];
14041 operands[6] = part[1][0];
14042 }
14043 }
14044 else
14045 {
14046 if (nparts == 3)
14047 {
14048 operands[2] = part[0][0];
14049 operands[3] = part[0][1];
14050 operands[4] = part[0][2];
14051 operands[5] = part[1][0];
14052 operands[6] = part[1][1];
14053 operands[7] = part[1][2];
14054 }
14055 else
14056 {
14057 operands[2] = part[0][0];
14058 operands[3] = part[0][1];
14059 operands[5] = part[1][0];
14060 operands[6] = part[1][1];
14061 }
14062 }
14063
14064 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
14065 if (optimize_size)
14066 {
14067 if (CONST_INT_P (operands[5])
14068 && operands[5] != const0_rtx
14069 && REG_P (operands[2]))
14070 {
14071 if (CONST_INT_P (operands[6])
14072 && INTVAL (operands[6]) == INTVAL (operands[5]))
14073 operands[6] = operands[2];
14074
14075 if (nparts == 3
14076 && CONST_INT_P (operands[7])
14077 && INTVAL (operands[7]) == INTVAL (operands[5]))
14078 operands[7] = operands[2];
14079 }
14080
14081 if (nparts == 3
14082 && CONST_INT_P (operands[6])
14083 && operands[6] != const0_rtx
14084 && REG_P (operands[3])
14085 && CONST_INT_P (operands[7])
14086 && INTVAL (operands[7]) == INTVAL (operands[6]))
14087 operands[7] = operands[3];
14088 }
14089
14090 emit_move_insn (operands[2], operands[5]);
14091 emit_move_insn (operands[3], operands[6]);
14092 if (nparts == 3)
14093 emit_move_insn (operands[4], operands[7]);
14094
14095 return;
14096 }
14097
14098 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
14099 left shift by a constant, either using a single shift or
14100 a sequence of add instructions. */
14101
14102 static void
14103 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
14104 {
14105 if (count == 1)
14106 {
14107 emit_insn ((mode == DImode
14108 ? gen_addsi3
14109 : gen_adddi3) (operand, operand, operand));
14110 }
14111 else if (!optimize_size
14112 && count * ix86_cost->add <= ix86_cost->shift_const)
14113 {
14114 int i;
14115 for (i=0; i<count; i++)
14116 {
14117 emit_insn ((mode == DImode
14118 ? gen_addsi3
14119 : gen_adddi3) (operand, operand, operand));
14120 }
14121 }
14122 else
14123 emit_insn ((mode == DImode
14124 ? gen_ashlsi3
14125 : gen_ashldi3) (operand, operand, GEN_INT (count)));
14126 }
14127
14128 void
14129 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
14130 {
14131 rtx low[2], high[2];
14132 int count;
14133 const int single_width = mode == DImode ? 32 : 64;
14134
14135 if (CONST_INT_P (operands[2]))
14136 {
14137 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14138 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14139
14140 if (count >= single_width)
14141 {
14142 emit_move_insn (high[0], low[1]);
14143 emit_move_insn (low[0], const0_rtx);
14144
14145 if (count > single_width)
14146 ix86_expand_ashl_const (high[0], count - single_width, mode);
14147 }
14148 else
14149 {
14150 if (!rtx_equal_p (operands[0], operands[1]))
14151 emit_move_insn (operands[0], operands[1]);
14152 emit_insn ((mode == DImode
14153 ? gen_x86_shld_1
14154 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
14155 ix86_expand_ashl_const (low[0], count, mode);
14156 }
14157 return;
14158 }
14159
14160 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14161
14162 if (operands[1] == const1_rtx)
14163 {
14164 /* Assuming we've chosen a QImode capable registers, then 1 << N
14165 can be done with two 32/64-bit shifts, no branches, no cmoves. */
14166 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
14167 {
14168 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
14169
14170 ix86_expand_clear (low[0]);
14171 ix86_expand_clear (high[0]);
14172 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
14173
14174 d = gen_lowpart (QImode, low[0]);
14175 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14176 s = gen_rtx_EQ (QImode, flags, const0_rtx);
14177 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14178
14179 d = gen_lowpart (QImode, high[0]);
14180 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14181 s = gen_rtx_NE (QImode, flags, const0_rtx);
14182 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14183 }
14184
14185 /* Otherwise, we can get the same results by manually performing
14186 a bit extract operation on bit 5/6, and then performing the two
14187 shifts. The two methods of getting 0/1 into low/high are exactly
14188 the same size. Avoiding the shift in the bit extract case helps
14189 pentium4 a bit; no one else seems to care much either way. */
14190 else
14191 {
14192 rtx x;
14193
14194 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
14195 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
14196 else
14197 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
14198 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
14199
14200 emit_insn ((mode == DImode
14201 ? gen_lshrsi3
14202 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
14203 emit_insn ((mode == DImode
14204 ? gen_andsi3
14205 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
14206 emit_move_insn (low[0], high[0]);
14207 emit_insn ((mode == DImode
14208 ? gen_xorsi3
14209 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
14210 }
14211
14212 emit_insn ((mode == DImode
14213 ? gen_ashlsi3
14214 : gen_ashldi3) (low[0], low[0], operands[2]));
14215 emit_insn ((mode == DImode
14216 ? gen_ashlsi3
14217 : gen_ashldi3) (high[0], high[0], operands[2]));
14218 return;
14219 }
14220
14221 if (operands[1] == constm1_rtx)
14222 {
14223 /* For -1 << N, we can avoid the shld instruction, because we
14224 know that we're shifting 0...31/63 ones into a -1. */
14225 emit_move_insn (low[0], constm1_rtx);
14226 if (optimize_size)
14227 emit_move_insn (high[0], low[0]);
14228 else
14229 emit_move_insn (high[0], constm1_rtx);
14230 }
14231 else
14232 {
14233 if (!rtx_equal_p (operands[0], operands[1]))
14234 emit_move_insn (operands[0], operands[1]);
14235
14236 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14237 emit_insn ((mode == DImode
14238 ? gen_x86_shld_1
14239 : gen_x86_64_shld) (high[0], low[0], operands[2]));
14240 }
14241
14242 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
14243
14244 if (TARGET_CMOVE && scratch)
14245 {
14246 ix86_expand_clear (scratch);
14247 emit_insn ((mode == DImode
14248 ? gen_x86_shift_adj_1
14249 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
14250 }
14251 else
14252 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
14253 }
14254
14255 void
14256 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
14257 {
14258 rtx low[2], high[2];
14259 int count;
14260 const int single_width = mode == DImode ? 32 : 64;
14261
14262 if (CONST_INT_P (operands[2]))
14263 {
14264 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14265 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14266
14267 if (count == single_width * 2 - 1)
14268 {
14269 emit_move_insn (high[0], high[1]);
14270 emit_insn ((mode == DImode
14271 ? gen_ashrsi3
14272 : gen_ashrdi3) (high[0], high[0],
14273 GEN_INT (single_width - 1)));
14274 emit_move_insn (low[0], high[0]);
14275
14276 }
14277 else if (count >= single_width)
14278 {
14279 emit_move_insn (low[0], high[1]);
14280 emit_move_insn (high[0], low[0]);
14281 emit_insn ((mode == DImode
14282 ? gen_ashrsi3
14283 : gen_ashrdi3) (high[0], high[0],
14284 GEN_INT (single_width - 1)));
14285 if (count > single_width)
14286 emit_insn ((mode == DImode
14287 ? gen_ashrsi3
14288 : gen_ashrdi3) (low[0], low[0],
14289 GEN_INT (count - single_width)));
14290 }
14291 else
14292 {
14293 if (!rtx_equal_p (operands[0], operands[1]))
14294 emit_move_insn (operands[0], operands[1]);
14295 emit_insn ((mode == DImode
14296 ? gen_x86_shrd_1
14297 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14298 emit_insn ((mode == DImode
14299 ? gen_ashrsi3
14300 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
14301 }
14302 }
14303 else
14304 {
14305 if (!rtx_equal_p (operands[0], operands[1]))
14306 emit_move_insn (operands[0], operands[1]);
14307
14308 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14309
14310 emit_insn ((mode == DImode
14311 ? gen_x86_shrd_1
14312 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14313 emit_insn ((mode == DImode
14314 ? gen_ashrsi3
14315 : gen_ashrdi3) (high[0], high[0], operands[2]));
14316
14317 if (TARGET_CMOVE && scratch)
14318 {
14319 emit_move_insn (scratch, high[0]);
14320 emit_insn ((mode == DImode
14321 ? gen_ashrsi3
14322 : gen_ashrdi3) (scratch, scratch,
14323 GEN_INT (single_width - 1)));
14324 emit_insn ((mode == DImode
14325 ? gen_x86_shift_adj_1
14326 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14327 scratch));
14328 }
14329 else
14330 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
14331 }
14332 }
14333
14334 void
14335 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
14336 {
14337 rtx low[2], high[2];
14338 int count;
14339 const int single_width = mode == DImode ? 32 : 64;
14340
14341 if (CONST_INT_P (operands[2]))
14342 {
14343 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14344 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14345
14346 if (count >= single_width)
14347 {
14348 emit_move_insn (low[0], high[1]);
14349 ix86_expand_clear (high[0]);
14350
14351 if (count > single_width)
14352 emit_insn ((mode == DImode
14353 ? gen_lshrsi3
14354 : gen_lshrdi3) (low[0], low[0],
14355 GEN_INT (count - single_width)));
14356 }
14357 else
14358 {
14359 if (!rtx_equal_p (operands[0], operands[1]))
14360 emit_move_insn (operands[0], operands[1]);
14361 emit_insn ((mode == DImode
14362 ? gen_x86_shrd_1
14363 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14364 emit_insn ((mode == DImode
14365 ? gen_lshrsi3
14366 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
14367 }
14368 }
14369 else
14370 {
14371 if (!rtx_equal_p (operands[0], operands[1]))
14372 emit_move_insn (operands[0], operands[1]);
14373
14374 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14375
14376 emit_insn ((mode == DImode
14377 ? gen_x86_shrd_1
14378 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14379 emit_insn ((mode == DImode
14380 ? gen_lshrsi3
14381 : gen_lshrdi3) (high[0], high[0], operands[2]));
14382
14383 /* Heh. By reversing the arguments, we can reuse this pattern. */
14384 if (TARGET_CMOVE && scratch)
14385 {
14386 ix86_expand_clear (scratch);
14387 emit_insn ((mode == DImode
14388 ? gen_x86_shift_adj_1
14389 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14390 scratch));
14391 }
14392 else
14393 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
14394 }
14395 }
14396
14397 /* Predict just emitted jump instruction to be taken with probability PROB. */
14398 static void
14399 predict_jump (int prob)
14400 {
14401 rtx insn = get_last_insn ();
14402 gcc_assert (JUMP_P (insn));
14403 REG_NOTES (insn)
14404 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14405 GEN_INT (prob),
14406 REG_NOTES (insn));
14407 }
14408
14409 /* Helper function for the string operations below. Dest VARIABLE whether
14410 it is aligned to VALUE bytes. If true, jump to the label. */
14411 static rtx
14412 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
14413 {
14414 rtx label = gen_label_rtx ();
14415 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
14416 if (GET_MODE (variable) == DImode)
14417 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
14418 else
14419 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
14420 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
14421 1, label);
14422 if (epilogue)
14423 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14424 else
14425 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14426 return label;
14427 }
14428
14429 /* Adjust COUNTER by the VALUE. */
14430 static void
14431 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
14432 {
14433 if (GET_MODE (countreg) == DImode)
14434 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14435 else
14436 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14437 }
14438
14439 /* Zero extend possibly SImode EXP to Pmode register. */
14440 rtx
14441 ix86_zero_extend_to_Pmode (rtx exp)
14442 {
14443 rtx r;
14444 if (GET_MODE (exp) == VOIDmode)
14445 return force_reg (Pmode, exp);
14446 if (GET_MODE (exp) == Pmode)
14447 return copy_to_mode_reg (Pmode, exp);
14448 r = gen_reg_rtx (Pmode);
14449 emit_insn (gen_zero_extendsidi2 (r, exp));
14450 return r;
14451 }
14452
14453 /* Divide COUNTREG by SCALE. */
14454 static rtx
14455 scale_counter (rtx countreg, int scale)
14456 {
14457 rtx sc;
14458 rtx piece_size_mask;
14459
14460 if (scale == 1)
14461 return countreg;
14462 if (CONST_INT_P (countreg))
14463 return GEN_INT (INTVAL (countreg) / scale);
14464 gcc_assert (REG_P (countreg));
14465
14466 piece_size_mask = GEN_INT (scale - 1);
14467 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14468 GEN_INT (exact_log2 (scale)),
14469 NULL, 1, OPTAB_DIRECT);
14470 return sc;
14471 }
14472
14473 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14474 DImode for constant loop counts. */
14475
14476 static enum machine_mode
14477 counter_mode (rtx count_exp)
14478 {
14479 if (GET_MODE (count_exp) != VOIDmode)
14480 return GET_MODE (count_exp);
14481 if (GET_CODE (count_exp) != CONST_INT)
14482 return Pmode;
14483 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14484 return DImode;
14485 return SImode;
14486 }
14487
14488 /* When SRCPTR is non-NULL, output simple loop to move memory
14489 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14490 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14491 equivalent loop to set memory by VALUE (supposed to be in MODE).
14492
14493 The size is rounded down to whole number of chunk size moved at once.
14494 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14495
14496
14497 static void
14498 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14499 rtx destptr, rtx srcptr, rtx value,
14500 rtx count, enum machine_mode mode, int unroll,
14501 int expected_size)
14502 {
14503 rtx out_label, top_label, iter, tmp;
14504 enum machine_mode iter_mode = counter_mode (count);
14505 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14506 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14507 rtx size;
14508 rtx x_addr;
14509 rtx y_addr;
14510 int i;
14511
14512 top_label = gen_label_rtx ();
14513 out_label = gen_label_rtx ();
14514 iter = gen_reg_rtx (iter_mode);
14515
14516 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14517 NULL, 1, OPTAB_DIRECT);
14518 /* Those two should combine. */
14519 if (piece_size == const1_rtx)
14520 {
14521 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14522 true, out_label);
14523 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14524 }
14525 emit_move_insn (iter, const0_rtx);
14526
14527 emit_label (top_label);
14528
14529 tmp = convert_modes (Pmode, iter_mode, iter, true);
14530 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14531 destmem = change_address (destmem, mode, x_addr);
14532
14533 if (srcmem)
14534 {
14535 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14536 srcmem = change_address (srcmem, mode, y_addr);
14537
14538 /* When unrolling for chips that reorder memory reads and writes,
14539 we can save registers by using single temporary.
14540 Also using 4 temporaries is overkill in 32bit mode. */
14541 if (!TARGET_64BIT && 0)
14542 {
14543 for (i = 0; i < unroll; i++)
14544 {
14545 if (i)
14546 {
14547 destmem =
14548 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14549 srcmem =
14550 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14551 }
14552 emit_move_insn (destmem, srcmem);
14553 }
14554 }
14555 else
14556 {
14557 rtx tmpreg[4];
14558 gcc_assert (unroll <= 4);
14559 for (i = 0; i < unroll; i++)
14560 {
14561 tmpreg[i] = gen_reg_rtx (mode);
14562 if (i)
14563 {
14564 srcmem =
14565 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14566 }
14567 emit_move_insn (tmpreg[i], srcmem);
14568 }
14569 for (i = 0; i < unroll; i++)
14570 {
14571 if (i)
14572 {
14573 destmem =
14574 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14575 }
14576 emit_move_insn (destmem, tmpreg[i]);
14577 }
14578 }
14579 }
14580 else
14581 for (i = 0; i < unroll; i++)
14582 {
14583 if (i)
14584 destmem =
14585 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14586 emit_move_insn (destmem, value);
14587 }
14588
14589 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14590 true, OPTAB_LIB_WIDEN);
14591 if (tmp != iter)
14592 emit_move_insn (iter, tmp);
14593
14594 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14595 true, top_label);
14596 if (expected_size != -1)
14597 {
14598 expected_size /= GET_MODE_SIZE (mode) * unroll;
14599 if (expected_size == 0)
14600 predict_jump (0);
14601 else if (expected_size > REG_BR_PROB_BASE)
14602 predict_jump (REG_BR_PROB_BASE - 1);
14603 else
14604 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14605 }
14606 else
14607 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14608 iter = ix86_zero_extend_to_Pmode (iter);
14609 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14610 true, OPTAB_LIB_WIDEN);
14611 if (tmp != destptr)
14612 emit_move_insn (destptr, tmp);
14613 if (srcptr)
14614 {
14615 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14616 true, OPTAB_LIB_WIDEN);
14617 if (tmp != srcptr)
14618 emit_move_insn (srcptr, tmp);
14619 }
14620 emit_label (out_label);
14621 }
14622
14623 /* Output "rep; mov" instruction.
14624 Arguments have same meaning as for previous function */
14625 static void
14626 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14627 rtx destptr, rtx srcptr,
14628 rtx count,
14629 enum machine_mode mode)
14630 {
14631 rtx destexp;
14632 rtx srcexp;
14633 rtx countreg;
14634
14635 /* If the size is known, it is shorter to use rep movs. */
14636 if (mode == QImode && CONST_INT_P (count)
14637 && !(INTVAL (count) & 3))
14638 mode = SImode;
14639
14640 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14641 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14642 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14643 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14644 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14645 if (mode != QImode)
14646 {
14647 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14648 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14649 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14650 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14651 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14652 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14653 }
14654 else
14655 {
14656 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14657 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14658 }
14659 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14660 destexp, srcexp));
14661 }
14662
14663 /* Output "rep; stos" instruction.
14664 Arguments have same meaning as for previous function */
14665 static void
14666 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14667 rtx count,
14668 enum machine_mode mode)
14669 {
14670 rtx destexp;
14671 rtx countreg;
14672
14673 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14674 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14675 value = force_reg (mode, gen_lowpart (mode, value));
14676 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14677 if (mode != QImode)
14678 {
14679 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14680 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14681 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14682 }
14683 else
14684 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14685 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14686 }
14687
14688 static void
14689 emit_strmov (rtx destmem, rtx srcmem,
14690 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14691 {
14692 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14693 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14694 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14695 }
14696
14697 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14698 static void
14699 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14700 rtx destptr, rtx srcptr, rtx count, int max_size)
14701 {
14702 rtx src, dest;
14703 if (CONST_INT_P (count))
14704 {
14705 HOST_WIDE_INT countval = INTVAL (count);
14706 int offset = 0;
14707
14708 if ((countval & 0x10) && max_size > 16)
14709 {
14710 if (TARGET_64BIT)
14711 {
14712 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14713 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14714 }
14715 else
14716 gcc_unreachable ();
14717 offset += 16;
14718 }
14719 if ((countval & 0x08) && max_size > 8)
14720 {
14721 if (TARGET_64BIT)
14722 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14723 else
14724 {
14725 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14726 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14727 }
14728 offset += 8;
14729 }
14730 if ((countval & 0x04) && max_size > 4)
14731 {
14732 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14733 offset += 4;
14734 }
14735 if ((countval & 0x02) && max_size > 2)
14736 {
14737 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14738 offset += 2;
14739 }
14740 if ((countval & 0x01) && max_size > 1)
14741 {
14742 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14743 offset += 1;
14744 }
14745 return;
14746 }
14747 if (max_size > 8)
14748 {
14749 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14750 count, 1, OPTAB_DIRECT);
14751 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14752 count, QImode, 1, 4);
14753 return;
14754 }
14755
14756 /* When there are stringops, we can cheaply increase dest and src pointers.
14757 Otherwise we save code size by maintaining offset (zero is readily
14758 available from preceding rep operation) and using x86 addressing modes.
14759 */
14760 if (TARGET_SINGLE_STRINGOP)
14761 {
14762 if (max_size > 4)
14763 {
14764 rtx label = ix86_expand_aligntest (count, 4, true);
14765 src = change_address (srcmem, SImode, srcptr);
14766 dest = change_address (destmem, SImode, destptr);
14767 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14768 emit_label (label);
14769 LABEL_NUSES (label) = 1;
14770 }
14771 if (max_size > 2)
14772 {
14773 rtx label = ix86_expand_aligntest (count, 2, true);
14774 src = change_address (srcmem, HImode, srcptr);
14775 dest = change_address (destmem, HImode, destptr);
14776 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14777 emit_label (label);
14778 LABEL_NUSES (label) = 1;
14779 }
14780 if (max_size > 1)
14781 {
14782 rtx label = ix86_expand_aligntest (count, 1, true);
14783 src = change_address (srcmem, QImode, srcptr);
14784 dest = change_address (destmem, QImode, destptr);
14785 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14786 emit_label (label);
14787 LABEL_NUSES (label) = 1;
14788 }
14789 }
14790 else
14791 {
14792 rtx offset = force_reg (Pmode, const0_rtx);
14793 rtx tmp;
14794
14795 if (max_size > 4)
14796 {
14797 rtx label = ix86_expand_aligntest (count, 4, true);
14798 src = change_address (srcmem, SImode, srcptr);
14799 dest = change_address (destmem, SImode, destptr);
14800 emit_move_insn (dest, src);
14801 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
14802 true, OPTAB_LIB_WIDEN);
14803 if (tmp != offset)
14804 emit_move_insn (offset, tmp);
14805 emit_label (label);
14806 LABEL_NUSES (label) = 1;
14807 }
14808 if (max_size > 2)
14809 {
14810 rtx label = ix86_expand_aligntest (count, 2, true);
14811 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14812 src = change_address (srcmem, HImode, tmp);
14813 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14814 dest = change_address (destmem, HImode, tmp);
14815 emit_move_insn (dest, src);
14816 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14817 true, OPTAB_LIB_WIDEN);
14818 if (tmp != offset)
14819 emit_move_insn (offset, tmp);
14820 emit_label (label);
14821 LABEL_NUSES (label) = 1;
14822 }
14823 if (max_size > 1)
14824 {
14825 rtx label = ix86_expand_aligntest (count, 1, true);
14826 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14827 src = change_address (srcmem, QImode, tmp);
14828 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14829 dest = change_address (destmem, QImode, tmp);
14830 emit_move_insn (dest, src);
14831 emit_label (label);
14832 LABEL_NUSES (label) = 1;
14833 }
14834 }
14835 }
14836
14837 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14838 static void
14839 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14840 rtx count, int max_size)
14841 {
14842 count =
14843 expand_simple_binop (counter_mode (count), AND, count,
14844 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14845 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14846 gen_lowpart (QImode, value), count, QImode,
14847 1, max_size / 2);
14848 }
14849
14850 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14851 static void
14852 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14853 {
14854 rtx dest;
14855
14856 if (CONST_INT_P (count))
14857 {
14858 HOST_WIDE_INT countval = INTVAL (count);
14859 int offset = 0;
14860
14861 if ((countval & 0x10) && max_size > 16)
14862 {
14863 if (TARGET_64BIT)
14864 {
14865 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14866 emit_insn (gen_strset (destptr, dest, value));
14867 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14868 emit_insn (gen_strset (destptr, dest, value));
14869 }
14870 else
14871 gcc_unreachable ();
14872 offset += 16;
14873 }
14874 if ((countval & 0x08) && max_size > 8)
14875 {
14876 if (TARGET_64BIT)
14877 {
14878 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14879 emit_insn (gen_strset (destptr, dest, value));
14880 }
14881 else
14882 {
14883 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14884 emit_insn (gen_strset (destptr, dest, value));
14885 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14886 emit_insn (gen_strset (destptr, dest, value));
14887 }
14888 offset += 8;
14889 }
14890 if ((countval & 0x04) && max_size > 4)
14891 {
14892 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14893 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14894 offset += 4;
14895 }
14896 if ((countval & 0x02) && max_size > 2)
14897 {
14898 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14899 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14900 offset += 2;
14901 }
14902 if ((countval & 0x01) && max_size > 1)
14903 {
14904 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14905 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14906 offset += 1;
14907 }
14908 return;
14909 }
14910 if (max_size > 32)
14911 {
14912 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14913 return;
14914 }
14915 if (max_size > 16)
14916 {
14917 rtx label = ix86_expand_aligntest (count, 16, true);
14918 if (TARGET_64BIT)
14919 {
14920 dest = change_address (destmem, DImode, destptr);
14921 emit_insn (gen_strset (destptr, dest, value));
14922 emit_insn (gen_strset (destptr, dest, value));
14923 }
14924 else
14925 {
14926 dest = change_address (destmem, SImode, destptr);
14927 emit_insn (gen_strset (destptr, dest, value));
14928 emit_insn (gen_strset (destptr, dest, value));
14929 emit_insn (gen_strset (destptr, dest, value));
14930 emit_insn (gen_strset (destptr, dest, value));
14931 }
14932 emit_label (label);
14933 LABEL_NUSES (label) = 1;
14934 }
14935 if (max_size > 8)
14936 {
14937 rtx label = ix86_expand_aligntest (count, 8, true);
14938 if (TARGET_64BIT)
14939 {
14940 dest = change_address (destmem, DImode, destptr);
14941 emit_insn (gen_strset (destptr, dest, value));
14942 }
14943 else
14944 {
14945 dest = change_address (destmem, SImode, destptr);
14946 emit_insn (gen_strset (destptr, dest, value));
14947 emit_insn (gen_strset (destptr, dest, value));
14948 }
14949 emit_label (label);
14950 LABEL_NUSES (label) = 1;
14951 }
14952 if (max_size > 4)
14953 {
14954 rtx label = ix86_expand_aligntest (count, 4, true);
14955 dest = change_address (destmem, SImode, destptr);
14956 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14957 emit_label (label);
14958 LABEL_NUSES (label) = 1;
14959 }
14960 if (max_size > 2)
14961 {
14962 rtx label = ix86_expand_aligntest (count, 2, true);
14963 dest = change_address (destmem, HImode, destptr);
14964 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14965 emit_label (label);
14966 LABEL_NUSES (label) = 1;
14967 }
14968 if (max_size > 1)
14969 {
14970 rtx label = ix86_expand_aligntest (count, 1, true);
14971 dest = change_address (destmem, QImode, destptr);
14972 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14973 emit_label (label);
14974 LABEL_NUSES (label) = 1;
14975 }
14976 }
14977
14978 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14979 DESIRED_ALIGNMENT. */
14980 static void
14981 expand_movmem_prologue (rtx destmem, rtx srcmem,
14982 rtx destptr, rtx srcptr, rtx count,
14983 int align, int desired_alignment)
14984 {
14985 if (align <= 1 && desired_alignment > 1)
14986 {
14987 rtx label = ix86_expand_aligntest (destptr, 1, false);
14988 srcmem = change_address (srcmem, QImode, srcptr);
14989 destmem = change_address (destmem, QImode, destptr);
14990 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14991 ix86_adjust_counter (count, 1);
14992 emit_label (label);
14993 LABEL_NUSES (label) = 1;
14994 }
14995 if (align <= 2 && desired_alignment > 2)
14996 {
14997 rtx label = ix86_expand_aligntest (destptr, 2, false);
14998 srcmem = change_address (srcmem, HImode, srcptr);
14999 destmem = change_address (destmem, HImode, destptr);
15000 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15001 ix86_adjust_counter (count, 2);
15002 emit_label (label);
15003 LABEL_NUSES (label) = 1;
15004 }
15005 if (align <= 4 && desired_alignment > 4)
15006 {
15007 rtx label = ix86_expand_aligntest (destptr, 4, false);
15008 srcmem = change_address (srcmem, SImode, srcptr);
15009 destmem = change_address (destmem, SImode, destptr);
15010 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15011 ix86_adjust_counter (count, 4);
15012 emit_label (label);
15013 LABEL_NUSES (label) = 1;
15014 }
15015 gcc_assert (desired_alignment <= 8);
15016 }
15017
15018 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
15019 DESIRED_ALIGNMENT. */
15020 static void
15021 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
15022 int align, int desired_alignment)
15023 {
15024 if (align <= 1 && desired_alignment > 1)
15025 {
15026 rtx label = ix86_expand_aligntest (destptr, 1, false);
15027 destmem = change_address (destmem, QImode, destptr);
15028 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
15029 ix86_adjust_counter (count, 1);
15030 emit_label (label);
15031 LABEL_NUSES (label) = 1;
15032 }
15033 if (align <= 2 && desired_alignment > 2)
15034 {
15035 rtx label = ix86_expand_aligntest (destptr, 2, false);
15036 destmem = change_address (destmem, HImode, destptr);
15037 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
15038 ix86_adjust_counter (count, 2);
15039 emit_label (label);
15040 LABEL_NUSES (label) = 1;
15041 }
15042 if (align <= 4 && desired_alignment > 4)
15043 {
15044 rtx label = ix86_expand_aligntest (destptr, 4, false);
15045 destmem = change_address (destmem, SImode, destptr);
15046 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
15047 ix86_adjust_counter (count, 4);
15048 emit_label (label);
15049 LABEL_NUSES (label) = 1;
15050 }
15051 gcc_assert (desired_alignment <= 8);
15052 }
15053
15054 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
15055 static enum stringop_alg
15056 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
15057 int *dynamic_check)
15058 {
15059 const struct stringop_algs * algs;
15060
15061 *dynamic_check = -1;
15062 if (memset)
15063 algs = &ix86_cost->memset[TARGET_64BIT != 0];
15064 else
15065 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
15066 if (stringop_alg != no_stringop)
15067 return stringop_alg;
15068 /* rep; movq or rep; movl is the smallest variant. */
15069 else if (optimize_size)
15070 {
15071 if (!count || (count & 3))
15072 return rep_prefix_1_byte;
15073 else
15074 return rep_prefix_4_byte;
15075 }
15076 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15077 */
15078 else if (expected_size != -1 && expected_size < 4)
15079 return loop_1_byte;
15080 else if (expected_size != -1)
15081 {
15082 unsigned int i;
15083 enum stringop_alg alg = libcall;
15084 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15085 {
15086 gcc_assert (algs->size[i].max);
15087 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
15088 {
15089 if (algs->size[i].alg != libcall)
15090 alg = algs->size[i].alg;
15091 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
15092 last non-libcall inline algorithm. */
15093 if (TARGET_INLINE_ALL_STRINGOPS)
15094 {
15095 /* When the current size is best to be copied by a libcall,
15096 but we are still forced to inline, run the heuristic bellow
15097 that will pick code for medium sized blocks. */
15098 if (alg != libcall)
15099 return alg;
15100 break;
15101 }
15102 else
15103 return algs->size[i].alg;
15104 }
15105 }
15106 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
15107 }
15108 /* When asked to inline the call anyway, try to pick meaningful choice.
15109 We look for maximal size of block that is faster to copy by hand and
15110 take blocks of at most of that size guessing that average size will
15111 be roughly half of the block.
15112
15113 If this turns out to be bad, we might simply specify the preferred
15114 choice in ix86_costs. */
15115 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15116 && algs->unknown_size == libcall)
15117 {
15118 int max = -1;
15119 enum stringop_alg alg;
15120 int i;
15121
15122 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15123 if (algs->size[i].alg != libcall && algs->size[i].alg)
15124 max = algs->size[i].max;
15125 if (max == -1)
15126 max = 4096;
15127 alg = decide_alg (count, max / 2, memset, dynamic_check);
15128 gcc_assert (*dynamic_check == -1);
15129 gcc_assert (alg != libcall);
15130 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15131 *dynamic_check = max;
15132 return alg;
15133 }
15134 return algs->unknown_size;
15135 }
15136
15137 /* Decide on alignment. We know that the operand is already aligned to ALIGN
15138 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
15139 static int
15140 decide_alignment (int align,
15141 enum stringop_alg alg,
15142 int expected_size)
15143 {
15144 int desired_align = 0;
15145 switch (alg)
15146 {
15147 case no_stringop:
15148 gcc_unreachable ();
15149 case loop:
15150 case unrolled_loop:
15151 desired_align = GET_MODE_SIZE (Pmode);
15152 break;
15153 case rep_prefix_8_byte:
15154 desired_align = 8;
15155 break;
15156 case rep_prefix_4_byte:
15157 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15158 copying whole cacheline at once. */
15159 if (TARGET_PENTIUMPRO)
15160 desired_align = 8;
15161 else
15162 desired_align = 4;
15163 break;
15164 case rep_prefix_1_byte:
15165 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15166 copying whole cacheline at once. */
15167 if (TARGET_PENTIUMPRO)
15168 desired_align = 8;
15169 else
15170 desired_align = 1;
15171 break;
15172 case loop_1_byte:
15173 desired_align = 1;
15174 break;
15175 case libcall:
15176 return 0;
15177 }
15178
15179 if (optimize_size)
15180 desired_align = 1;
15181 if (desired_align < align)
15182 desired_align = align;
15183 if (expected_size != -1 && expected_size < 4)
15184 desired_align = align;
15185 return desired_align;
15186 }
15187
15188 /* Return the smallest power of 2 greater than VAL. */
15189 static int
15190 smallest_pow2_greater_than (int val)
15191 {
15192 int ret = 1;
15193 while (ret <= val)
15194 ret <<= 1;
15195 return ret;
15196 }
15197
15198 /* Expand string move (memcpy) operation. Use i386 string operations when
15199 profitable. expand_clrmem contains similar code. The code depends upon
15200 architecture, block size and alignment, but always has the same
15201 overall structure:
15202
15203 1) Prologue guard: Conditional that jumps up to epilogues for small
15204 blocks that can be handled by epilogue alone. This is faster but
15205 also needed for correctness, since prologue assume the block is larger
15206 than the desired alignment.
15207
15208 Optional dynamic check for size and libcall for large
15209 blocks is emitted here too, with -minline-stringops-dynamically.
15210
15211 2) Prologue: copy first few bytes in order to get destination aligned
15212 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
15213 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15214 We emit either a jump tree on power of two sized blocks, or a byte loop.
15215
15216 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15217 with specified algorithm.
15218
15219 4) Epilogue: code copying tail of the block that is too small to be
15220 handled by main body (or up to size guarded by prologue guard). */
15221
15222 int
15223 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
15224 rtx expected_align_exp, rtx expected_size_exp)
15225 {
15226 rtx destreg;
15227 rtx srcreg;
15228 rtx label = NULL;
15229 rtx tmp;
15230 rtx jump_around_label = NULL;
15231 HOST_WIDE_INT align = 1;
15232 unsigned HOST_WIDE_INT count = 0;
15233 HOST_WIDE_INT expected_size = -1;
15234 int size_needed = 0, epilogue_size_needed;
15235 int desired_align = 0;
15236 enum stringop_alg alg;
15237 int dynamic_check;
15238
15239 if (CONST_INT_P (align_exp))
15240 align = INTVAL (align_exp);
15241 /* i386 can do misaligned access on reasonably increased cost. */
15242 if (CONST_INT_P (expected_align_exp)
15243 && INTVAL (expected_align_exp) > align)
15244 align = INTVAL (expected_align_exp);
15245 if (CONST_INT_P (count_exp))
15246 count = expected_size = INTVAL (count_exp);
15247 if (CONST_INT_P (expected_size_exp) && count == 0)
15248 expected_size = INTVAL (expected_size_exp);
15249
15250 /* Step 0: Decide on preferred algorithm, desired alignment and
15251 size of chunks to be copied by main loop. */
15252
15253 alg = decide_alg (count, expected_size, false, &dynamic_check);
15254 desired_align = decide_alignment (align, alg, expected_size);
15255
15256 if (!TARGET_ALIGN_STRINGOPS)
15257 align = desired_align;
15258
15259 if (alg == libcall)
15260 return 0;
15261 gcc_assert (alg != no_stringop);
15262 if (!count)
15263 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
15264 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15265 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
15266 switch (alg)
15267 {
15268 case libcall:
15269 case no_stringop:
15270 gcc_unreachable ();
15271 case loop:
15272 size_needed = GET_MODE_SIZE (Pmode);
15273 break;
15274 case unrolled_loop:
15275 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
15276 break;
15277 case rep_prefix_8_byte:
15278 size_needed = 8;
15279 break;
15280 case rep_prefix_4_byte:
15281 size_needed = 4;
15282 break;
15283 case rep_prefix_1_byte:
15284 case loop_1_byte:
15285 size_needed = 1;
15286 break;
15287 }
15288
15289 epilogue_size_needed = size_needed;
15290
15291 /* Step 1: Prologue guard. */
15292
15293 /* Alignment code needs count to be in register. */
15294 if (CONST_INT_P (count_exp) && desired_align > align)
15295 {
15296 enum machine_mode mode = SImode;
15297 if (TARGET_64BIT && (count & ~0xffffffff))
15298 mode = DImode;
15299 count_exp = force_reg (mode, count_exp);
15300 }
15301 gcc_assert (desired_align >= 1 && align >= 1);
15302
15303 /* Ensure that alignment prologue won't copy past end of block. */
15304 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15305 {
15306 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15307 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15308 Make sure it is power of 2. */
15309 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15310
15311 label = gen_label_rtx ();
15312 emit_cmp_and_jump_insns (count_exp,
15313 GEN_INT (epilogue_size_needed),
15314 LTU, 0, counter_mode (count_exp), 1, label);
15315 if (GET_CODE (count_exp) == CONST_INT)
15316 ;
15317 else if (expected_size == -1 || expected_size < epilogue_size_needed)
15318 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15319 else
15320 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15321 }
15322 /* Emit code to decide on runtime whether library call or inline should be
15323 used. */
15324 if (dynamic_check != -1)
15325 {
15326 rtx hot_label = gen_label_rtx ();
15327 jump_around_label = gen_label_rtx ();
15328 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15329 LEU, 0, GET_MODE (count_exp), 1, hot_label);
15330 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15331 emit_block_move_via_libcall (dst, src, count_exp, false);
15332 emit_jump (jump_around_label);
15333 emit_label (hot_label);
15334 }
15335
15336 /* Step 2: Alignment prologue. */
15337
15338 if (desired_align > align)
15339 {
15340 /* Except for the first move in epilogue, we no longer know
15341 constant offset in aliasing info. It don't seems to worth
15342 the pain to maintain it for the first move, so throw away
15343 the info early. */
15344 src = change_address (src, BLKmode, srcreg);
15345 dst = change_address (dst, BLKmode, destreg);
15346 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
15347 desired_align);
15348 }
15349 if (label && size_needed == 1)
15350 {
15351 emit_label (label);
15352 LABEL_NUSES (label) = 1;
15353 label = NULL;
15354 }
15355
15356 /* Step 3: Main loop. */
15357
15358 switch (alg)
15359 {
15360 case libcall:
15361 case no_stringop:
15362 gcc_unreachable ();
15363 case loop_1_byte:
15364 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15365 count_exp, QImode, 1, expected_size);
15366 break;
15367 case loop:
15368 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15369 count_exp, Pmode, 1, expected_size);
15370 break;
15371 case unrolled_loop:
15372 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15373 registers for 4 temporaries anyway. */
15374 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15375 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
15376 expected_size);
15377 break;
15378 case rep_prefix_8_byte:
15379 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15380 DImode);
15381 break;
15382 case rep_prefix_4_byte:
15383 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15384 SImode);
15385 break;
15386 case rep_prefix_1_byte:
15387 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15388 QImode);
15389 break;
15390 }
15391 /* Adjust properly the offset of src and dest memory for aliasing. */
15392 if (CONST_INT_P (count_exp))
15393 {
15394 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
15395 (count / size_needed) * size_needed);
15396 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15397 (count / size_needed) * size_needed);
15398 }
15399 else
15400 {
15401 src = change_address (src, BLKmode, srcreg);
15402 dst = change_address (dst, BLKmode, destreg);
15403 }
15404
15405 /* Step 4: Epilogue to copy the remaining bytes. */
15406
15407 if (label)
15408 {
15409 /* When the main loop is done, COUNT_EXP might hold original count,
15410 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15411 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15412 bytes. Compensate if needed. */
15413
15414 if (size_needed < epilogue_size_needed)
15415 {
15416 tmp =
15417 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15418 GEN_INT (size_needed - 1), count_exp, 1,
15419 OPTAB_DIRECT);
15420 if (tmp != count_exp)
15421 emit_move_insn (count_exp, tmp);
15422 }
15423 emit_label (label);
15424 LABEL_NUSES (label) = 1;
15425 }
15426
15427 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15428 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
15429 epilogue_size_needed);
15430 if (jump_around_label)
15431 emit_label (jump_around_label);
15432 return 1;
15433 }
15434
15435 /* Helper function for memcpy. For QImode value 0xXY produce
15436 0xXYXYXYXY of wide specified by MODE. This is essentially
15437 a * 0x10101010, but we can do slightly better than
15438 synth_mult by unwinding the sequence by hand on CPUs with
15439 slow multiply. */
15440 static rtx
15441 promote_duplicated_reg (enum machine_mode mode, rtx val)
15442 {
15443 enum machine_mode valmode = GET_MODE (val);
15444 rtx tmp;
15445 int nops = mode == DImode ? 3 : 2;
15446
15447 gcc_assert (mode == SImode || mode == DImode);
15448 if (val == const0_rtx)
15449 return copy_to_mode_reg (mode, const0_rtx);
15450 if (CONST_INT_P (val))
15451 {
15452 HOST_WIDE_INT v = INTVAL (val) & 255;
15453
15454 v |= v << 8;
15455 v |= v << 16;
15456 if (mode == DImode)
15457 v |= (v << 16) << 16;
15458 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15459 }
15460
15461 if (valmode == VOIDmode)
15462 valmode = QImode;
15463 if (valmode != QImode)
15464 val = gen_lowpart (QImode, val);
15465 if (mode == QImode)
15466 return val;
15467 if (!TARGET_PARTIAL_REG_STALL)
15468 nops--;
15469 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15470 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15471 <= (ix86_cost->shift_const + ix86_cost->add) * nops
15472 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15473 {
15474 rtx reg = convert_modes (mode, QImode, val, true);
15475 tmp = promote_duplicated_reg (mode, const1_rtx);
15476 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15477 OPTAB_DIRECT);
15478 }
15479 else
15480 {
15481 rtx reg = convert_modes (mode, QImode, val, true);
15482
15483 if (!TARGET_PARTIAL_REG_STALL)
15484 if (mode == SImode)
15485 emit_insn (gen_movsi_insv_1 (reg, reg));
15486 else
15487 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15488 else
15489 {
15490 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15491 NULL, 1, OPTAB_DIRECT);
15492 reg =
15493 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15494 }
15495 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15496 NULL, 1, OPTAB_DIRECT);
15497 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15498 if (mode == SImode)
15499 return reg;
15500 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15501 NULL, 1, OPTAB_DIRECT);
15502 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15503 return reg;
15504 }
15505 }
15506
15507 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15508 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15509 alignment from ALIGN to DESIRED_ALIGN. */
15510 static rtx
15511 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15512 {
15513 rtx promoted_val;
15514
15515 if (TARGET_64BIT
15516 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15517 promoted_val = promote_duplicated_reg (DImode, val);
15518 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15519 promoted_val = promote_duplicated_reg (SImode, val);
15520 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15521 promoted_val = promote_duplicated_reg (HImode, val);
15522 else
15523 promoted_val = val;
15524
15525 return promoted_val;
15526 }
15527
15528 /* Expand string clear operation (bzero). Use i386 string operations when
15529 profitable. See expand_movmem comment for explanation of individual
15530 steps performed. */
15531 int
15532 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15533 rtx expected_align_exp, rtx expected_size_exp)
15534 {
15535 rtx destreg;
15536 rtx label = NULL;
15537 rtx tmp;
15538 rtx jump_around_label = NULL;
15539 HOST_WIDE_INT align = 1;
15540 unsigned HOST_WIDE_INT count = 0;
15541 HOST_WIDE_INT expected_size = -1;
15542 int size_needed = 0, epilogue_size_needed;
15543 int desired_align = 0;
15544 enum stringop_alg alg;
15545 rtx promoted_val = NULL;
15546 bool force_loopy_epilogue = false;
15547 int dynamic_check;
15548
15549 if (CONST_INT_P (align_exp))
15550 align = INTVAL (align_exp);
15551 /* i386 can do misaligned access on reasonably increased cost. */
15552 if (CONST_INT_P (expected_align_exp)
15553 && INTVAL (expected_align_exp) > align)
15554 align = INTVAL (expected_align_exp);
15555 if (CONST_INT_P (count_exp))
15556 count = expected_size = INTVAL (count_exp);
15557 if (CONST_INT_P (expected_size_exp) && count == 0)
15558 expected_size = INTVAL (expected_size_exp);
15559
15560 /* Step 0: Decide on preferred algorithm, desired alignment and
15561 size of chunks to be copied by main loop. */
15562
15563 alg = decide_alg (count, expected_size, true, &dynamic_check);
15564 desired_align = decide_alignment (align, alg, expected_size);
15565
15566 if (!TARGET_ALIGN_STRINGOPS)
15567 align = desired_align;
15568
15569 if (alg == libcall)
15570 return 0;
15571 gcc_assert (alg != no_stringop);
15572 if (!count)
15573 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15574 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15575 switch (alg)
15576 {
15577 case libcall:
15578 case no_stringop:
15579 gcc_unreachable ();
15580 case loop:
15581 size_needed = GET_MODE_SIZE (Pmode);
15582 break;
15583 case unrolled_loop:
15584 size_needed = GET_MODE_SIZE (Pmode) * 4;
15585 break;
15586 case rep_prefix_8_byte:
15587 size_needed = 8;
15588 break;
15589 case rep_prefix_4_byte:
15590 size_needed = 4;
15591 break;
15592 case rep_prefix_1_byte:
15593 case loop_1_byte:
15594 size_needed = 1;
15595 break;
15596 }
15597 epilogue_size_needed = size_needed;
15598
15599 /* Step 1: Prologue guard. */
15600
15601 /* Alignment code needs count to be in register. */
15602 if (CONST_INT_P (count_exp) && desired_align > align)
15603 {
15604 enum machine_mode mode = SImode;
15605 if (TARGET_64BIT && (count & ~0xffffffff))
15606 mode = DImode;
15607 count_exp = force_reg (mode, count_exp);
15608 }
15609 /* Do the cheap promotion to allow better CSE across the
15610 main loop and epilogue (ie one load of the big constant in the
15611 front of all code. */
15612 if (CONST_INT_P (val_exp))
15613 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15614 desired_align, align);
15615 /* Ensure that alignment prologue won't copy past end of block. */
15616 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15617 {
15618 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15619 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15620 Make sure it is power of 2. */
15621 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15622
15623 /* To improve performance of small blocks, we jump around the VAL
15624 promoting mode. This mean that if the promoted VAL is not constant,
15625 we might not use it in the epilogue and have to use byte
15626 loop variant. */
15627 if (epilogue_size_needed > 2 && !promoted_val)
15628 force_loopy_epilogue = true;
15629 label = gen_label_rtx ();
15630 emit_cmp_and_jump_insns (count_exp,
15631 GEN_INT (epilogue_size_needed),
15632 LTU, 0, counter_mode (count_exp), 1, label);
15633 if (GET_CODE (count_exp) == CONST_INT)
15634 ;
15635 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15636 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15637 else
15638 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15639 }
15640 if (dynamic_check != -1)
15641 {
15642 rtx hot_label = gen_label_rtx ();
15643 jump_around_label = gen_label_rtx ();
15644 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15645 LEU, 0, counter_mode (count_exp), 1, hot_label);
15646 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15647 set_storage_via_libcall (dst, count_exp, val_exp, false);
15648 emit_jump (jump_around_label);
15649 emit_label (hot_label);
15650 }
15651
15652 /* Step 2: Alignment prologue. */
15653
15654 /* Do the expensive promotion once we branched off the small blocks. */
15655 if (!promoted_val)
15656 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15657 desired_align, align);
15658 gcc_assert (desired_align >= 1 && align >= 1);
15659
15660 if (desired_align > align)
15661 {
15662 /* Except for the first move in epilogue, we no longer know
15663 constant offset in aliasing info. It don't seems to worth
15664 the pain to maintain it for the first move, so throw away
15665 the info early. */
15666 dst = change_address (dst, BLKmode, destreg);
15667 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15668 desired_align);
15669 }
15670 if (label && size_needed == 1)
15671 {
15672 emit_label (label);
15673 LABEL_NUSES (label) = 1;
15674 label = NULL;
15675 }
15676
15677 /* Step 3: Main loop. */
15678
15679 switch (alg)
15680 {
15681 case libcall:
15682 case no_stringop:
15683 gcc_unreachable ();
15684 case loop_1_byte:
15685 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15686 count_exp, QImode, 1, expected_size);
15687 break;
15688 case loop:
15689 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15690 count_exp, Pmode, 1, expected_size);
15691 break;
15692 case unrolled_loop:
15693 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15694 count_exp, Pmode, 4, expected_size);
15695 break;
15696 case rep_prefix_8_byte:
15697 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15698 DImode);
15699 break;
15700 case rep_prefix_4_byte:
15701 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15702 SImode);
15703 break;
15704 case rep_prefix_1_byte:
15705 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15706 QImode);
15707 break;
15708 }
15709 /* Adjust properly the offset of src and dest memory for aliasing. */
15710 if (CONST_INT_P (count_exp))
15711 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15712 (count / size_needed) * size_needed);
15713 else
15714 dst = change_address (dst, BLKmode, destreg);
15715
15716 /* Step 4: Epilogue to copy the remaining bytes. */
15717
15718 if (label)
15719 {
15720 /* When the main loop is done, COUNT_EXP might hold original count,
15721 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15722 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15723 bytes. Compensate if needed. */
15724
15725 if (size_needed < desired_align - align)
15726 {
15727 tmp =
15728 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15729 GEN_INT (size_needed - 1), count_exp, 1,
15730 OPTAB_DIRECT);
15731 size_needed = desired_align - align + 1;
15732 if (tmp != count_exp)
15733 emit_move_insn (count_exp, tmp);
15734 }
15735 emit_label (label);
15736 LABEL_NUSES (label) = 1;
15737 }
15738 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15739 {
15740 if (force_loopy_epilogue)
15741 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
15742 size_needed);
15743 else
15744 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
15745 size_needed);
15746 }
15747 if (jump_around_label)
15748 emit_label (jump_around_label);
15749 return 1;
15750 }
15751
15752 /* Expand the appropriate insns for doing strlen if not just doing
15753 repnz; scasb
15754
15755 out = result, initialized with the start address
15756 align_rtx = alignment of the address.
15757 scratch = scratch register, initialized with the startaddress when
15758 not aligned, otherwise undefined
15759
15760 This is just the body. It needs the initializations mentioned above and
15761 some address computing at the end. These things are done in i386.md. */
15762
15763 static void
15764 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
15765 {
15766 int align;
15767 rtx tmp;
15768 rtx align_2_label = NULL_RTX;
15769 rtx align_3_label = NULL_RTX;
15770 rtx align_4_label = gen_label_rtx ();
15771 rtx end_0_label = gen_label_rtx ();
15772 rtx mem;
15773 rtx tmpreg = gen_reg_rtx (SImode);
15774 rtx scratch = gen_reg_rtx (SImode);
15775 rtx cmp;
15776
15777 align = 0;
15778 if (CONST_INT_P (align_rtx))
15779 align = INTVAL (align_rtx);
15780
15781 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15782
15783 /* Is there a known alignment and is it less than 4? */
15784 if (align < 4)
15785 {
15786 rtx scratch1 = gen_reg_rtx (Pmode);
15787 emit_move_insn (scratch1, out);
15788 /* Is there a known alignment and is it not 2? */
15789 if (align != 2)
15790 {
15791 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
15792 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
15793
15794 /* Leave just the 3 lower bits. */
15795 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
15796 NULL_RTX, 0, OPTAB_WIDEN);
15797
15798 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15799 Pmode, 1, align_4_label);
15800 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
15801 Pmode, 1, align_2_label);
15802 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
15803 Pmode, 1, align_3_label);
15804 }
15805 else
15806 {
15807 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15808 check if is aligned to 4 - byte. */
15809
15810 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
15811 NULL_RTX, 0, OPTAB_WIDEN);
15812
15813 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15814 Pmode, 1, align_4_label);
15815 }
15816
15817 mem = change_address (src, QImode, out);
15818
15819 /* Now compare the bytes. */
15820
15821 /* Compare the first n unaligned byte on a byte per byte basis. */
15822 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15823 QImode, 1, end_0_label);
15824
15825 /* Increment the address. */
15826 if (TARGET_64BIT)
15827 emit_insn (gen_adddi3 (out, out, const1_rtx));
15828 else
15829 emit_insn (gen_addsi3 (out, out, const1_rtx));
15830
15831 /* Not needed with an alignment of 2 */
15832 if (align != 2)
15833 {
15834 emit_label (align_2_label);
15835
15836 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15837 end_0_label);
15838
15839 if (TARGET_64BIT)
15840 emit_insn (gen_adddi3 (out, out, const1_rtx));
15841 else
15842 emit_insn (gen_addsi3 (out, out, const1_rtx));
15843
15844 emit_label (align_3_label);
15845 }
15846
15847 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15848 end_0_label);
15849
15850 if (TARGET_64BIT)
15851 emit_insn (gen_adddi3 (out, out, const1_rtx));
15852 else
15853 emit_insn (gen_addsi3 (out, out, const1_rtx));
15854 }
15855
15856 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15857 align this loop. It gives only huge programs, but does not help to
15858 speed up. */
15859 emit_label (align_4_label);
15860
15861 mem = change_address (src, SImode, out);
15862 emit_move_insn (scratch, mem);
15863 if (TARGET_64BIT)
15864 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15865 else
15866 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15867
15868 /* This formula yields a nonzero result iff one of the bytes is zero.
15869 This saves three branches inside loop and many cycles. */
15870
15871 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15872 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15873 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
15874 emit_insn (gen_andsi3 (tmpreg, tmpreg,
15875 gen_int_mode (0x80808080, SImode)));
15876 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
15877 align_4_label);
15878
15879 if (TARGET_CMOVE)
15880 {
15881 rtx reg = gen_reg_rtx (SImode);
15882 rtx reg2 = gen_reg_rtx (Pmode);
15883 emit_move_insn (reg, tmpreg);
15884 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
15885
15886 /* If zero is not in the first two bytes, move two bytes forward. */
15887 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15888 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15889 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15890 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
15891 gen_rtx_IF_THEN_ELSE (SImode, tmp,
15892 reg,
15893 tmpreg)));
15894 /* Emit lea manually to avoid clobbering of flags. */
15895 emit_insn (gen_rtx_SET (SImode, reg2,
15896 gen_rtx_PLUS (Pmode, out, const2_rtx)));
15897
15898 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15899 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15900 emit_insn (gen_rtx_SET (VOIDmode, out,
15901 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15902 reg2,
15903 out)));
15904
15905 }
15906 else
15907 {
15908 rtx end_2_label = gen_label_rtx ();
15909 /* Is zero in the first two bytes? */
15910
15911 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15912 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15913 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15914 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15915 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15916 pc_rtx);
15917 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15918 JUMP_LABEL (tmp) = end_2_label;
15919
15920 /* Not in the first two. Move two bytes forward. */
15921 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15922 if (TARGET_64BIT)
15923 emit_insn (gen_adddi3 (out, out, const2_rtx));
15924 else
15925 emit_insn (gen_addsi3 (out, out, const2_rtx));
15926
15927 emit_label (end_2_label);
15928
15929 }
15930
15931 /* Avoid branch in fixing the byte. */
15932 tmpreg = gen_lowpart (QImode, tmpreg);
15933 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15934 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
15935 if (TARGET_64BIT)
15936 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15937 else
15938 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15939
15940 emit_label (end_0_label);
15941 }
15942
15943 /* Expand strlen. */
15944
15945 int
15946 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
15947 {
15948 rtx addr, scratch1, scratch2, scratch3, scratch4;
15949
15950 /* The generic case of strlen expander is long. Avoid it's
15951 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15952
15953 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15954 && !TARGET_INLINE_ALL_STRINGOPS
15955 && !optimize_size
15956 && (!CONST_INT_P (align) || INTVAL (align) < 4))
15957 return 0;
15958
15959 addr = force_reg (Pmode, XEXP (src, 0));
15960 scratch1 = gen_reg_rtx (Pmode);
15961
15962 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15963 && !optimize_size)
15964 {
15965 /* Well it seems that some optimizer does not combine a call like
15966 foo(strlen(bar), strlen(bar));
15967 when the move and the subtraction is done here. It does calculate
15968 the length just once when these instructions are done inside of
15969 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15970 often used and I use one fewer register for the lifetime of
15971 output_strlen_unroll() this is better. */
15972
15973 emit_move_insn (out, addr);
15974
15975 ix86_expand_strlensi_unroll_1 (out, src, align);
15976
15977 /* strlensi_unroll_1 returns the address of the zero at the end of
15978 the string, like memchr(), so compute the length by subtracting
15979 the start address. */
15980 if (TARGET_64BIT)
15981 emit_insn (gen_subdi3 (out, out, addr));
15982 else
15983 emit_insn (gen_subsi3 (out, out, addr));
15984 }
15985 else
15986 {
15987 rtx unspec;
15988 scratch2 = gen_reg_rtx (Pmode);
15989 scratch3 = gen_reg_rtx (Pmode);
15990 scratch4 = force_reg (Pmode, constm1_rtx);
15991
15992 emit_move_insn (scratch3, addr);
15993 eoschar = force_reg (QImode, eoschar);
15994
15995 src = replace_equiv_address_nv (src, scratch3);
15996
15997 /* If .md starts supporting :P, this can be done in .md. */
15998 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
15999 scratch4), UNSPEC_SCAS);
16000 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
16001 if (TARGET_64BIT)
16002 {
16003 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
16004 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
16005 }
16006 else
16007 {
16008 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
16009 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
16010 }
16011 }
16012 return 1;
16013 }
16014
16015 /* For given symbol (function) construct code to compute address of it's PLT
16016 entry in large x86-64 PIC model. */
16017 rtx
16018 construct_plt_address (rtx symbol)
16019 {
16020 rtx tmp = gen_reg_rtx (Pmode);
16021 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
16022
16023 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
16024 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
16025
16026 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
16027 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
16028 return tmp;
16029 }
16030
16031 void
16032 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
16033 rtx callarg2 ATTRIBUTE_UNUSED,
16034 rtx pop, int sibcall)
16035 {
16036 rtx use = NULL, call;
16037
16038 if (pop == const0_rtx)
16039 pop = NULL;
16040 gcc_assert (!TARGET_64BIT || !pop);
16041
16042 if (TARGET_MACHO && !TARGET_64BIT)
16043 {
16044 #if TARGET_MACHO
16045 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
16046 fnaddr = machopic_indirect_call_target (fnaddr);
16047 #endif
16048 }
16049 else
16050 {
16051 /* Static functions and indirect calls don't need the pic register. */
16052 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
16053 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16054 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
16055 use_reg (&use, pic_offset_table_rtx);
16056 }
16057
16058 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
16059 {
16060 rtx al = gen_rtx_REG (QImode, 0);
16061 emit_move_insn (al, callarg2);
16062 use_reg (&use, al);
16063 }
16064
16065 if (ix86_cmodel == CM_LARGE_PIC
16066 && GET_CODE (fnaddr) == MEM
16067 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16068 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
16069 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
16070 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
16071 {
16072 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16073 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16074 }
16075 if (sibcall && TARGET_64BIT
16076 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
16077 {
16078 rtx addr;
16079 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16080 fnaddr = gen_rtx_REG (Pmode, R11_REG);
16081 emit_move_insn (fnaddr, addr);
16082 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16083 }
16084
16085 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
16086 if (retval)
16087 call = gen_rtx_SET (VOIDmode, retval, call);
16088 if (pop)
16089 {
16090 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
16091 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
16092 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
16093 }
16094
16095 call = emit_call_insn (call);
16096 if (use)
16097 CALL_INSN_FUNCTION_USAGE (call) = use;
16098 }
16099
16100 \f
16101 /* Clear stack slot assignments remembered from previous functions.
16102 This is called from INIT_EXPANDERS once before RTL is emitted for each
16103 function. */
16104
16105 static struct machine_function *
16106 ix86_init_machine_status (void)
16107 {
16108 struct machine_function *f;
16109
16110 f = GGC_CNEW (struct machine_function);
16111 f->use_fast_prologue_epilogue_nregs = -1;
16112 f->tls_descriptor_call_expanded_p = 0;
16113
16114 return f;
16115 }
16116
16117 /* Return a MEM corresponding to a stack slot with mode MODE.
16118 Allocate a new slot if necessary.
16119
16120 The RTL for a function can have several slots available: N is
16121 which slot to use. */
16122
16123 rtx
16124 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
16125 {
16126 struct stack_local_entry *s;
16127
16128 gcc_assert (n < MAX_386_STACK_LOCALS);
16129
16130 /* Virtual slot is valid only before vregs are instantiated. */
16131 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
16132
16133 for (s = ix86_stack_locals; s; s = s->next)
16134 if (s->mode == mode && s->n == n)
16135 return copy_rtx (s->rtl);
16136
16137 s = (struct stack_local_entry *)
16138 ggc_alloc (sizeof (struct stack_local_entry));
16139 s->n = n;
16140 s->mode = mode;
16141 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16142
16143 s->next = ix86_stack_locals;
16144 ix86_stack_locals = s;
16145 return s->rtl;
16146 }
16147
16148 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16149
16150 static GTY(()) rtx ix86_tls_symbol;
16151 rtx
16152 ix86_tls_get_addr (void)
16153 {
16154
16155 if (!ix86_tls_symbol)
16156 {
16157 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
16158 (TARGET_ANY_GNU_TLS
16159 && !TARGET_64BIT)
16160 ? "___tls_get_addr"
16161 : "__tls_get_addr");
16162 }
16163
16164 return ix86_tls_symbol;
16165 }
16166
16167 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16168
16169 static GTY(()) rtx ix86_tls_module_base_symbol;
16170 rtx
16171 ix86_tls_module_base (void)
16172 {
16173
16174 if (!ix86_tls_module_base_symbol)
16175 {
16176 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
16177 "_TLS_MODULE_BASE_");
16178 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16179 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16180 }
16181
16182 return ix86_tls_module_base_symbol;
16183 }
16184 \f
16185 /* Calculate the length of the memory address in the instruction
16186 encoding. Does not include the one-byte modrm, opcode, or prefix. */
16187
16188 int
16189 memory_address_length (rtx addr)
16190 {
16191 struct ix86_address parts;
16192 rtx base, index, disp;
16193 int len;
16194 int ok;
16195
16196 if (GET_CODE (addr) == PRE_DEC
16197 || GET_CODE (addr) == POST_INC
16198 || GET_CODE (addr) == PRE_MODIFY
16199 || GET_CODE (addr) == POST_MODIFY)
16200 return 0;
16201
16202 ok = ix86_decompose_address (addr, &parts);
16203 gcc_assert (ok);
16204
16205 if (parts.base && GET_CODE (parts.base) == SUBREG)
16206 parts.base = SUBREG_REG (parts.base);
16207 if (parts.index && GET_CODE (parts.index) == SUBREG)
16208 parts.index = SUBREG_REG (parts.index);
16209
16210 base = parts.base;
16211 index = parts.index;
16212 disp = parts.disp;
16213 len = 0;
16214
16215 /* Rule of thumb:
16216 - esp as the base always wants an index,
16217 - ebp as the base always wants a displacement. */
16218
16219 /* Register Indirect. */
16220 if (base && !index && !disp)
16221 {
16222 /* esp (for its index) and ebp (for its displacement) need
16223 the two-byte modrm form. */
16224 if (addr == stack_pointer_rtx
16225 || addr == arg_pointer_rtx
16226 || addr == frame_pointer_rtx
16227 || addr == hard_frame_pointer_rtx)
16228 len = 1;
16229 }
16230
16231 /* Direct Addressing. */
16232 else if (disp && !base && !index)
16233 len = 4;
16234
16235 else
16236 {
16237 /* Find the length of the displacement constant. */
16238 if (disp)
16239 {
16240 if (base && satisfies_constraint_K (disp))
16241 len = 1;
16242 else
16243 len = 4;
16244 }
16245 /* ebp always wants a displacement. */
16246 else if (base == hard_frame_pointer_rtx)
16247 len = 1;
16248
16249 /* An index requires the two-byte modrm form.... */
16250 if (index
16251 /* ...like esp, which always wants an index. */
16252 || base == stack_pointer_rtx
16253 || base == arg_pointer_rtx
16254 || base == frame_pointer_rtx)
16255 len += 1;
16256 }
16257
16258 return len;
16259 }
16260
16261 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16262 is set, expect that insn have 8bit immediate alternative. */
16263 int
16264 ix86_attr_length_immediate_default (rtx insn, int shortform)
16265 {
16266 int len = 0;
16267 int i;
16268 extract_insn_cached (insn);
16269 for (i = recog_data.n_operands - 1; i >= 0; --i)
16270 if (CONSTANT_P (recog_data.operand[i]))
16271 {
16272 gcc_assert (!len);
16273 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
16274 len = 1;
16275 else
16276 {
16277 switch (get_attr_mode (insn))
16278 {
16279 case MODE_QI:
16280 len+=1;
16281 break;
16282 case MODE_HI:
16283 len+=2;
16284 break;
16285 case MODE_SI:
16286 len+=4;
16287 break;
16288 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16289 case MODE_DI:
16290 len+=4;
16291 break;
16292 default:
16293 fatal_insn ("unknown insn mode", insn);
16294 }
16295 }
16296 }
16297 return len;
16298 }
16299 /* Compute default value for "length_address" attribute. */
16300 int
16301 ix86_attr_length_address_default (rtx insn)
16302 {
16303 int i;
16304
16305 if (get_attr_type (insn) == TYPE_LEA)
16306 {
16307 rtx set = PATTERN (insn);
16308
16309 if (GET_CODE (set) == PARALLEL)
16310 set = XVECEXP (set, 0, 0);
16311
16312 gcc_assert (GET_CODE (set) == SET);
16313
16314 return memory_address_length (SET_SRC (set));
16315 }
16316
16317 extract_insn_cached (insn);
16318 for (i = recog_data.n_operands - 1; i >= 0; --i)
16319 if (MEM_P (recog_data.operand[i]))
16320 {
16321 return memory_address_length (XEXP (recog_data.operand[i], 0));
16322 break;
16323 }
16324 return 0;
16325 }
16326 \f
16327 /* Return the maximum number of instructions a cpu can issue. */
16328
16329 static int
16330 ix86_issue_rate (void)
16331 {
16332 switch (ix86_tune)
16333 {
16334 case PROCESSOR_PENTIUM:
16335 case PROCESSOR_K6:
16336 return 2;
16337
16338 case PROCESSOR_PENTIUMPRO:
16339 case PROCESSOR_PENTIUM4:
16340 case PROCESSOR_ATHLON:
16341 case PROCESSOR_K8:
16342 case PROCESSOR_AMDFAM10:
16343 case PROCESSOR_NOCONA:
16344 case PROCESSOR_GENERIC32:
16345 case PROCESSOR_GENERIC64:
16346 return 3;
16347
16348 case PROCESSOR_CORE2:
16349 return 4;
16350
16351 default:
16352 return 1;
16353 }
16354 }
16355
16356 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16357 by DEP_INSN and nothing set by DEP_INSN. */
16358
16359 static int
16360 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16361 {
16362 rtx set, set2;
16363
16364 /* Simplify the test for uninteresting insns. */
16365 if (insn_type != TYPE_SETCC
16366 && insn_type != TYPE_ICMOV
16367 && insn_type != TYPE_FCMOV
16368 && insn_type != TYPE_IBR)
16369 return 0;
16370
16371 if ((set = single_set (dep_insn)) != 0)
16372 {
16373 set = SET_DEST (set);
16374 set2 = NULL_RTX;
16375 }
16376 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
16377 && XVECLEN (PATTERN (dep_insn), 0) == 2
16378 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
16379 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
16380 {
16381 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16382 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16383 }
16384 else
16385 return 0;
16386
16387 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
16388 return 0;
16389
16390 /* This test is true if the dependent insn reads the flags but
16391 not any other potentially set register. */
16392 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
16393 return 0;
16394
16395 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
16396 return 0;
16397
16398 return 1;
16399 }
16400
16401 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16402 address with operands set by DEP_INSN. */
16403
16404 static int
16405 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16406 {
16407 rtx addr;
16408
16409 if (insn_type == TYPE_LEA
16410 && TARGET_PENTIUM)
16411 {
16412 addr = PATTERN (insn);
16413
16414 if (GET_CODE (addr) == PARALLEL)
16415 addr = XVECEXP (addr, 0, 0);
16416
16417 gcc_assert (GET_CODE (addr) == SET);
16418
16419 addr = SET_SRC (addr);
16420 }
16421 else
16422 {
16423 int i;
16424 extract_insn_cached (insn);
16425 for (i = recog_data.n_operands - 1; i >= 0; --i)
16426 if (MEM_P (recog_data.operand[i]))
16427 {
16428 addr = XEXP (recog_data.operand[i], 0);
16429 goto found;
16430 }
16431 return 0;
16432 found:;
16433 }
16434
16435 return modified_in_p (addr, dep_insn);
16436 }
16437
16438 static int
16439 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
16440 {
16441 enum attr_type insn_type, dep_insn_type;
16442 enum attr_memory memory;
16443 rtx set, set2;
16444 int dep_insn_code_number;
16445
16446 /* Anti and output dependencies have zero cost on all CPUs. */
16447 if (REG_NOTE_KIND (link) != 0)
16448 return 0;
16449
16450 dep_insn_code_number = recog_memoized (dep_insn);
16451
16452 /* If we can't recognize the insns, we can't really do anything. */
16453 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16454 return cost;
16455
16456 insn_type = get_attr_type (insn);
16457 dep_insn_type = get_attr_type (dep_insn);
16458
16459 switch (ix86_tune)
16460 {
16461 case PROCESSOR_PENTIUM:
16462 /* Address Generation Interlock adds a cycle of latency. */
16463 if (ix86_agi_dependent (insn, dep_insn, insn_type))
16464 cost += 1;
16465
16466 /* ??? Compares pair with jump/setcc. */
16467 if (ix86_flags_dependent (insn, dep_insn, insn_type))
16468 cost = 0;
16469
16470 /* Floating point stores require value to be ready one cycle earlier. */
16471 if (insn_type == TYPE_FMOV
16472 && get_attr_memory (insn) == MEMORY_STORE
16473 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16474 cost += 1;
16475 break;
16476
16477 case PROCESSOR_PENTIUMPRO:
16478 memory = get_attr_memory (insn);
16479
16480 /* INT->FP conversion is expensive. */
16481 if (get_attr_fp_int_src (dep_insn))
16482 cost += 5;
16483
16484 /* There is one cycle extra latency between an FP op and a store. */
16485 if (insn_type == TYPE_FMOV
16486 && (set = single_set (dep_insn)) != NULL_RTX
16487 && (set2 = single_set (insn)) != NULL_RTX
16488 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
16489 && MEM_P (SET_DEST (set2)))
16490 cost += 1;
16491
16492 /* Show ability of reorder buffer to hide latency of load by executing
16493 in parallel with previous instruction in case
16494 previous instruction is not needed to compute the address. */
16495 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16496 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16497 {
16498 /* Claim moves to take one cycle, as core can issue one load
16499 at time and the next load can start cycle later. */
16500 if (dep_insn_type == TYPE_IMOV
16501 || dep_insn_type == TYPE_FMOV)
16502 cost = 1;
16503 else if (cost > 1)
16504 cost--;
16505 }
16506 break;
16507
16508 case PROCESSOR_K6:
16509 memory = get_attr_memory (insn);
16510
16511 /* The esp dependency is resolved before the instruction is really
16512 finished. */
16513 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16514 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16515 return 1;
16516
16517 /* INT->FP conversion is expensive. */
16518 if (get_attr_fp_int_src (dep_insn))
16519 cost += 5;
16520
16521 /* Show ability of reorder buffer to hide latency of load by executing
16522 in parallel with previous instruction in case
16523 previous instruction is not needed to compute the address. */
16524 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16525 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16526 {
16527 /* Claim moves to take one cycle, as core can issue one load
16528 at time and the next load can start cycle later. */
16529 if (dep_insn_type == TYPE_IMOV
16530 || dep_insn_type == TYPE_FMOV)
16531 cost = 1;
16532 else if (cost > 2)
16533 cost -= 2;
16534 else
16535 cost = 1;
16536 }
16537 break;
16538
16539 case PROCESSOR_ATHLON:
16540 case PROCESSOR_K8:
16541 case PROCESSOR_AMDFAM10:
16542 case PROCESSOR_GENERIC32:
16543 case PROCESSOR_GENERIC64:
16544 memory = get_attr_memory (insn);
16545
16546 /* Show ability of reorder buffer to hide latency of load by executing
16547 in parallel with previous instruction in case
16548 previous instruction is not needed to compute the address. */
16549 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16550 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16551 {
16552 enum attr_unit unit = get_attr_unit (insn);
16553 int loadcost = 3;
16554
16555 /* Because of the difference between the length of integer and
16556 floating unit pipeline preparation stages, the memory operands
16557 for floating point are cheaper.
16558
16559 ??? For Athlon it the difference is most probably 2. */
16560 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16561 loadcost = 3;
16562 else
16563 loadcost = TARGET_ATHLON ? 2 : 0;
16564
16565 if (cost >= loadcost)
16566 cost -= loadcost;
16567 else
16568 cost = 0;
16569 }
16570
16571 default:
16572 break;
16573 }
16574
16575 return cost;
16576 }
16577
16578 /* How many alternative schedules to try. This should be as wide as the
16579 scheduling freedom in the DFA, but no wider. Making this value too
16580 large results extra work for the scheduler. */
16581
16582 static int
16583 ia32_multipass_dfa_lookahead (void)
16584 {
16585 if (ix86_tune == PROCESSOR_PENTIUM)
16586 return 2;
16587
16588 if (ix86_tune == PROCESSOR_PENTIUMPRO
16589 || ix86_tune == PROCESSOR_K6)
16590 return 1;
16591
16592 else
16593 return 0;
16594 }
16595
16596 \f
16597 /* Compute the alignment given to a constant that is being placed in memory.
16598 EXP is the constant and ALIGN is the alignment that the object would
16599 ordinarily have.
16600 The value of this function is used instead of that alignment to align
16601 the object. */
16602
16603 int
16604 ix86_constant_alignment (tree exp, int align)
16605 {
16606 if (TREE_CODE (exp) == REAL_CST)
16607 {
16608 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16609 return 64;
16610 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16611 return 128;
16612 }
16613 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16614 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16615 return BITS_PER_WORD;
16616
16617 return align;
16618 }
16619
16620 /* Compute the alignment for a static variable.
16621 TYPE is the data type, and ALIGN is the alignment that
16622 the object would ordinarily have. The value of this function is used
16623 instead of that alignment to align the object. */
16624
16625 int
16626 ix86_data_alignment (tree type, int align)
16627 {
16628 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16629
16630 if (AGGREGATE_TYPE_P (type)
16631 && TYPE_SIZE (type)
16632 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16633 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16634 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16635 && align < max_align)
16636 align = max_align;
16637
16638 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16639 to 16byte boundary. */
16640 if (TARGET_64BIT)
16641 {
16642 if (AGGREGATE_TYPE_P (type)
16643 && TYPE_SIZE (type)
16644 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16645 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16646 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16647 return 128;
16648 }
16649
16650 if (TREE_CODE (type) == ARRAY_TYPE)
16651 {
16652 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16653 return 64;
16654 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16655 return 128;
16656 }
16657 else if (TREE_CODE (type) == COMPLEX_TYPE)
16658 {
16659
16660 if (TYPE_MODE (type) == DCmode && align < 64)
16661 return 64;
16662 if (TYPE_MODE (type) == XCmode && align < 128)
16663 return 128;
16664 }
16665 else if ((TREE_CODE (type) == RECORD_TYPE
16666 || TREE_CODE (type) == UNION_TYPE
16667 || TREE_CODE (type) == QUAL_UNION_TYPE)
16668 && TYPE_FIELDS (type))
16669 {
16670 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16671 return 64;
16672 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16673 return 128;
16674 }
16675 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16676 || TREE_CODE (type) == INTEGER_TYPE)
16677 {
16678 if (TYPE_MODE (type) == DFmode && align < 64)
16679 return 64;
16680 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16681 return 128;
16682 }
16683
16684 return align;
16685 }
16686
16687 /* Compute the alignment for a local variable.
16688 TYPE is the data type, and ALIGN is the alignment that
16689 the object would ordinarily have. The value of this macro is used
16690 instead of that alignment to align the object. */
16691
16692 int
16693 ix86_local_alignment (tree type, int align)
16694 {
16695 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16696 to 16byte boundary. */
16697 if (TARGET_64BIT)
16698 {
16699 if (AGGREGATE_TYPE_P (type)
16700 && TYPE_SIZE (type)
16701 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16702 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16703 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16704 return 128;
16705 }
16706 if (TREE_CODE (type) == ARRAY_TYPE)
16707 {
16708 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16709 return 64;
16710 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16711 return 128;
16712 }
16713 else if (TREE_CODE (type) == COMPLEX_TYPE)
16714 {
16715 if (TYPE_MODE (type) == DCmode && align < 64)
16716 return 64;
16717 if (TYPE_MODE (type) == XCmode && align < 128)
16718 return 128;
16719 }
16720 else if ((TREE_CODE (type) == RECORD_TYPE
16721 || TREE_CODE (type) == UNION_TYPE
16722 || TREE_CODE (type) == QUAL_UNION_TYPE)
16723 && TYPE_FIELDS (type))
16724 {
16725 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16726 return 64;
16727 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16728 return 128;
16729 }
16730 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16731 || TREE_CODE (type) == INTEGER_TYPE)
16732 {
16733
16734 if (TYPE_MODE (type) == DFmode && align < 64)
16735 return 64;
16736 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16737 return 128;
16738 }
16739 return align;
16740 }
16741 \f
16742 /* Emit RTL insns to initialize the variable parts of a trampoline.
16743 FNADDR is an RTX for the address of the function's pure code.
16744 CXT is an RTX for the static chain value for the function. */
16745 void
16746 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
16747 {
16748 if (!TARGET_64BIT)
16749 {
16750 /* Compute offset from the end of the jmp to the target function. */
16751 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
16752 plus_constant (tramp, 10),
16753 NULL_RTX, 1, OPTAB_DIRECT);
16754 emit_move_insn (gen_rtx_MEM (QImode, tramp),
16755 gen_int_mode (0xb9, QImode));
16756 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
16757 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
16758 gen_int_mode (0xe9, QImode));
16759 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
16760 }
16761 else
16762 {
16763 int offset = 0;
16764 /* Try to load address using shorter movl instead of movabs.
16765 We may want to support movq for kernel mode, but kernel does not use
16766 trampolines at the moment. */
16767 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16768 {
16769 fnaddr = copy_to_mode_reg (DImode, fnaddr);
16770 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16771 gen_int_mode (0xbb41, HImode));
16772 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
16773 gen_lowpart (SImode, fnaddr));
16774 offset += 6;
16775 }
16776 else
16777 {
16778 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16779 gen_int_mode (0xbb49, HImode));
16780 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16781 fnaddr);
16782 offset += 10;
16783 }
16784 /* Load static chain using movabs to r10. */
16785 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16786 gen_int_mode (0xba49, HImode));
16787 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16788 cxt);
16789 offset += 10;
16790 /* Jump to the r11 */
16791 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16792 gen_int_mode (0xff49, HImode));
16793 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
16794 gen_int_mode (0xe3, QImode));
16795 offset += 3;
16796 gcc_assert (offset <= TRAMPOLINE_SIZE);
16797 }
16798
16799 #ifdef ENABLE_EXECUTE_STACK
16800 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16801 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
16802 #endif
16803 }
16804 \f
16805 /* Codes for all the SSE/MMX builtins. */
16806 enum ix86_builtins
16807 {
16808 IX86_BUILTIN_ADDPS,
16809 IX86_BUILTIN_ADDSS,
16810 IX86_BUILTIN_DIVPS,
16811 IX86_BUILTIN_DIVSS,
16812 IX86_BUILTIN_MULPS,
16813 IX86_BUILTIN_MULSS,
16814 IX86_BUILTIN_SUBPS,
16815 IX86_BUILTIN_SUBSS,
16816
16817 IX86_BUILTIN_CMPEQPS,
16818 IX86_BUILTIN_CMPLTPS,
16819 IX86_BUILTIN_CMPLEPS,
16820 IX86_BUILTIN_CMPGTPS,
16821 IX86_BUILTIN_CMPGEPS,
16822 IX86_BUILTIN_CMPNEQPS,
16823 IX86_BUILTIN_CMPNLTPS,
16824 IX86_BUILTIN_CMPNLEPS,
16825 IX86_BUILTIN_CMPNGTPS,
16826 IX86_BUILTIN_CMPNGEPS,
16827 IX86_BUILTIN_CMPORDPS,
16828 IX86_BUILTIN_CMPUNORDPS,
16829 IX86_BUILTIN_CMPEQSS,
16830 IX86_BUILTIN_CMPLTSS,
16831 IX86_BUILTIN_CMPLESS,
16832 IX86_BUILTIN_CMPNEQSS,
16833 IX86_BUILTIN_CMPNLTSS,
16834 IX86_BUILTIN_CMPNLESS,
16835 IX86_BUILTIN_CMPNGTSS,
16836 IX86_BUILTIN_CMPNGESS,
16837 IX86_BUILTIN_CMPORDSS,
16838 IX86_BUILTIN_CMPUNORDSS,
16839
16840 IX86_BUILTIN_COMIEQSS,
16841 IX86_BUILTIN_COMILTSS,
16842 IX86_BUILTIN_COMILESS,
16843 IX86_BUILTIN_COMIGTSS,
16844 IX86_BUILTIN_COMIGESS,
16845 IX86_BUILTIN_COMINEQSS,
16846 IX86_BUILTIN_UCOMIEQSS,
16847 IX86_BUILTIN_UCOMILTSS,
16848 IX86_BUILTIN_UCOMILESS,
16849 IX86_BUILTIN_UCOMIGTSS,
16850 IX86_BUILTIN_UCOMIGESS,
16851 IX86_BUILTIN_UCOMINEQSS,
16852
16853 IX86_BUILTIN_CVTPI2PS,
16854 IX86_BUILTIN_CVTPS2PI,
16855 IX86_BUILTIN_CVTSI2SS,
16856 IX86_BUILTIN_CVTSI642SS,
16857 IX86_BUILTIN_CVTSS2SI,
16858 IX86_BUILTIN_CVTSS2SI64,
16859 IX86_BUILTIN_CVTTPS2PI,
16860 IX86_BUILTIN_CVTTSS2SI,
16861 IX86_BUILTIN_CVTTSS2SI64,
16862
16863 IX86_BUILTIN_MAXPS,
16864 IX86_BUILTIN_MAXSS,
16865 IX86_BUILTIN_MINPS,
16866 IX86_BUILTIN_MINSS,
16867
16868 IX86_BUILTIN_LOADUPS,
16869 IX86_BUILTIN_STOREUPS,
16870 IX86_BUILTIN_MOVSS,
16871
16872 IX86_BUILTIN_MOVHLPS,
16873 IX86_BUILTIN_MOVLHPS,
16874 IX86_BUILTIN_LOADHPS,
16875 IX86_BUILTIN_LOADLPS,
16876 IX86_BUILTIN_STOREHPS,
16877 IX86_BUILTIN_STORELPS,
16878
16879 IX86_BUILTIN_MASKMOVQ,
16880 IX86_BUILTIN_MOVMSKPS,
16881 IX86_BUILTIN_PMOVMSKB,
16882
16883 IX86_BUILTIN_MOVNTPS,
16884 IX86_BUILTIN_MOVNTQ,
16885
16886 IX86_BUILTIN_LOADDQU,
16887 IX86_BUILTIN_STOREDQU,
16888
16889 IX86_BUILTIN_PACKSSWB,
16890 IX86_BUILTIN_PACKSSDW,
16891 IX86_BUILTIN_PACKUSWB,
16892
16893 IX86_BUILTIN_PADDB,
16894 IX86_BUILTIN_PADDW,
16895 IX86_BUILTIN_PADDD,
16896 IX86_BUILTIN_PADDQ,
16897 IX86_BUILTIN_PADDSB,
16898 IX86_BUILTIN_PADDSW,
16899 IX86_BUILTIN_PADDUSB,
16900 IX86_BUILTIN_PADDUSW,
16901 IX86_BUILTIN_PSUBB,
16902 IX86_BUILTIN_PSUBW,
16903 IX86_BUILTIN_PSUBD,
16904 IX86_BUILTIN_PSUBQ,
16905 IX86_BUILTIN_PSUBSB,
16906 IX86_BUILTIN_PSUBSW,
16907 IX86_BUILTIN_PSUBUSB,
16908 IX86_BUILTIN_PSUBUSW,
16909
16910 IX86_BUILTIN_PAND,
16911 IX86_BUILTIN_PANDN,
16912 IX86_BUILTIN_POR,
16913 IX86_BUILTIN_PXOR,
16914
16915 IX86_BUILTIN_PAVGB,
16916 IX86_BUILTIN_PAVGW,
16917
16918 IX86_BUILTIN_PCMPEQB,
16919 IX86_BUILTIN_PCMPEQW,
16920 IX86_BUILTIN_PCMPEQD,
16921 IX86_BUILTIN_PCMPGTB,
16922 IX86_BUILTIN_PCMPGTW,
16923 IX86_BUILTIN_PCMPGTD,
16924
16925 IX86_BUILTIN_PMADDWD,
16926
16927 IX86_BUILTIN_PMAXSW,
16928 IX86_BUILTIN_PMAXUB,
16929 IX86_BUILTIN_PMINSW,
16930 IX86_BUILTIN_PMINUB,
16931
16932 IX86_BUILTIN_PMULHUW,
16933 IX86_BUILTIN_PMULHW,
16934 IX86_BUILTIN_PMULLW,
16935
16936 IX86_BUILTIN_PSADBW,
16937 IX86_BUILTIN_PSHUFW,
16938
16939 IX86_BUILTIN_PSLLW,
16940 IX86_BUILTIN_PSLLD,
16941 IX86_BUILTIN_PSLLQ,
16942 IX86_BUILTIN_PSRAW,
16943 IX86_BUILTIN_PSRAD,
16944 IX86_BUILTIN_PSRLW,
16945 IX86_BUILTIN_PSRLD,
16946 IX86_BUILTIN_PSRLQ,
16947 IX86_BUILTIN_PSLLWI,
16948 IX86_BUILTIN_PSLLDI,
16949 IX86_BUILTIN_PSLLQI,
16950 IX86_BUILTIN_PSRAWI,
16951 IX86_BUILTIN_PSRADI,
16952 IX86_BUILTIN_PSRLWI,
16953 IX86_BUILTIN_PSRLDI,
16954 IX86_BUILTIN_PSRLQI,
16955
16956 IX86_BUILTIN_PUNPCKHBW,
16957 IX86_BUILTIN_PUNPCKHWD,
16958 IX86_BUILTIN_PUNPCKHDQ,
16959 IX86_BUILTIN_PUNPCKLBW,
16960 IX86_BUILTIN_PUNPCKLWD,
16961 IX86_BUILTIN_PUNPCKLDQ,
16962
16963 IX86_BUILTIN_SHUFPS,
16964
16965 IX86_BUILTIN_RCPPS,
16966 IX86_BUILTIN_RCPSS,
16967 IX86_BUILTIN_RSQRTPS,
16968 IX86_BUILTIN_RSQRTSS,
16969 IX86_BUILTIN_RSQRTF,
16970 IX86_BUILTIN_SQRTPS,
16971 IX86_BUILTIN_SQRTSS,
16972
16973 IX86_BUILTIN_UNPCKHPS,
16974 IX86_BUILTIN_UNPCKLPS,
16975
16976 IX86_BUILTIN_ANDPS,
16977 IX86_BUILTIN_ANDNPS,
16978 IX86_BUILTIN_ORPS,
16979 IX86_BUILTIN_XORPS,
16980
16981 IX86_BUILTIN_EMMS,
16982 IX86_BUILTIN_LDMXCSR,
16983 IX86_BUILTIN_STMXCSR,
16984 IX86_BUILTIN_SFENCE,
16985
16986 /* 3DNow! Original */
16987 IX86_BUILTIN_FEMMS,
16988 IX86_BUILTIN_PAVGUSB,
16989 IX86_BUILTIN_PF2ID,
16990 IX86_BUILTIN_PFACC,
16991 IX86_BUILTIN_PFADD,
16992 IX86_BUILTIN_PFCMPEQ,
16993 IX86_BUILTIN_PFCMPGE,
16994 IX86_BUILTIN_PFCMPGT,
16995 IX86_BUILTIN_PFMAX,
16996 IX86_BUILTIN_PFMIN,
16997 IX86_BUILTIN_PFMUL,
16998 IX86_BUILTIN_PFRCP,
16999 IX86_BUILTIN_PFRCPIT1,
17000 IX86_BUILTIN_PFRCPIT2,
17001 IX86_BUILTIN_PFRSQIT1,
17002 IX86_BUILTIN_PFRSQRT,
17003 IX86_BUILTIN_PFSUB,
17004 IX86_BUILTIN_PFSUBR,
17005 IX86_BUILTIN_PI2FD,
17006 IX86_BUILTIN_PMULHRW,
17007
17008 /* 3DNow! Athlon Extensions */
17009 IX86_BUILTIN_PF2IW,
17010 IX86_BUILTIN_PFNACC,
17011 IX86_BUILTIN_PFPNACC,
17012 IX86_BUILTIN_PI2FW,
17013 IX86_BUILTIN_PSWAPDSI,
17014 IX86_BUILTIN_PSWAPDSF,
17015
17016 /* SSE2 */
17017 IX86_BUILTIN_ADDPD,
17018 IX86_BUILTIN_ADDSD,
17019 IX86_BUILTIN_DIVPD,
17020 IX86_BUILTIN_DIVSD,
17021 IX86_BUILTIN_MULPD,
17022 IX86_BUILTIN_MULSD,
17023 IX86_BUILTIN_SUBPD,
17024 IX86_BUILTIN_SUBSD,
17025
17026 IX86_BUILTIN_CMPEQPD,
17027 IX86_BUILTIN_CMPLTPD,
17028 IX86_BUILTIN_CMPLEPD,
17029 IX86_BUILTIN_CMPGTPD,
17030 IX86_BUILTIN_CMPGEPD,
17031 IX86_BUILTIN_CMPNEQPD,
17032 IX86_BUILTIN_CMPNLTPD,
17033 IX86_BUILTIN_CMPNLEPD,
17034 IX86_BUILTIN_CMPNGTPD,
17035 IX86_BUILTIN_CMPNGEPD,
17036 IX86_BUILTIN_CMPORDPD,
17037 IX86_BUILTIN_CMPUNORDPD,
17038 IX86_BUILTIN_CMPEQSD,
17039 IX86_BUILTIN_CMPLTSD,
17040 IX86_BUILTIN_CMPLESD,
17041 IX86_BUILTIN_CMPNEQSD,
17042 IX86_BUILTIN_CMPNLTSD,
17043 IX86_BUILTIN_CMPNLESD,
17044 IX86_BUILTIN_CMPORDSD,
17045 IX86_BUILTIN_CMPUNORDSD,
17046
17047 IX86_BUILTIN_COMIEQSD,
17048 IX86_BUILTIN_COMILTSD,
17049 IX86_BUILTIN_COMILESD,
17050 IX86_BUILTIN_COMIGTSD,
17051 IX86_BUILTIN_COMIGESD,
17052 IX86_BUILTIN_COMINEQSD,
17053 IX86_BUILTIN_UCOMIEQSD,
17054 IX86_BUILTIN_UCOMILTSD,
17055 IX86_BUILTIN_UCOMILESD,
17056 IX86_BUILTIN_UCOMIGTSD,
17057 IX86_BUILTIN_UCOMIGESD,
17058 IX86_BUILTIN_UCOMINEQSD,
17059
17060 IX86_BUILTIN_MAXPD,
17061 IX86_BUILTIN_MAXSD,
17062 IX86_BUILTIN_MINPD,
17063 IX86_BUILTIN_MINSD,
17064
17065 IX86_BUILTIN_ANDPD,
17066 IX86_BUILTIN_ANDNPD,
17067 IX86_BUILTIN_ORPD,
17068 IX86_BUILTIN_XORPD,
17069
17070 IX86_BUILTIN_SQRTPD,
17071 IX86_BUILTIN_SQRTSD,
17072
17073 IX86_BUILTIN_UNPCKHPD,
17074 IX86_BUILTIN_UNPCKLPD,
17075
17076 IX86_BUILTIN_SHUFPD,
17077
17078 IX86_BUILTIN_LOADUPD,
17079 IX86_BUILTIN_STOREUPD,
17080 IX86_BUILTIN_MOVSD,
17081
17082 IX86_BUILTIN_LOADHPD,
17083 IX86_BUILTIN_LOADLPD,
17084
17085 IX86_BUILTIN_CVTDQ2PD,
17086 IX86_BUILTIN_CVTDQ2PS,
17087
17088 IX86_BUILTIN_CVTPD2DQ,
17089 IX86_BUILTIN_CVTPD2PI,
17090 IX86_BUILTIN_CVTPD2PS,
17091 IX86_BUILTIN_CVTTPD2DQ,
17092 IX86_BUILTIN_CVTTPD2PI,
17093
17094 IX86_BUILTIN_CVTPI2PD,
17095 IX86_BUILTIN_CVTSI2SD,
17096 IX86_BUILTIN_CVTSI642SD,
17097
17098 IX86_BUILTIN_CVTSD2SI,
17099 IX86_BUILTIN_CVTSD2SI64,
17100 IX86_BUILTIN_CVTSD2SS,
17101 IX86_BUILTIN_CVTSS2SD,
17102 IX86_BUILTIN_CVTTSD2SI,
17103 IX86_BUILTIN_CVTTSD2SI64,
17104
17105 IX86_BUILTIN_CVTPS2DQ,
17106 IX86_BUILTIN_CVTPS2PD,
17107 IX86_BUILTIN_CVTTPS2DQ,
17108
17109 IX86_BUILTIN_MOVNTI,
17110 IX86_BUILTIN_MOVNTPD,
17111 IX86_BUILTIN_MOVNTDQ,
17112
17113 /* SSE2 MMX */
17114 IX86_BUILTIN_MASKMOVDQU,
17115 IX86_BUILTIN_MOVMSKPD,
17116 IX86_BUILTIN_PMOVMSKB128,
17117
17118 IX86_BUILTIN_PACKSSWB128,
17119 IX86_BUILTIN_PACKSSDW128,
17120 IX86_BUILTIN_PACKUSWB128,
17121
17122 IX86_BUILTIN_PADDB128,
17123 IX86_BUILTIN_PADDW128,
17124 IX86_BUILTIN_PADDD128,
17125 IX86_BUILTIN_PADDQ128,
17126 IX86_BUILTIN_PADDSB128,
17127 IX86_BUILTIN_PADDSW128,
17128 IX86_BUILTIN_PADDUSB128,
17129 IX86_BUILTIN_PADDUSW128,
17130 IX86_BUILTIN_PSUBB128,
17131 IX86_BUILTIN_PSUBW128,
17132 IX86_BUILTIN_PSUBD128,
17133 IX86_BUILTIN_PSUBQ128,
17134 IX86_BUILTIN_PSUBSB128,
17135 IX86_BUILTIN_PSUBSW128,
17136 IX86_BUILTIN_PSUBUSB128,
17137 IX86_BUILTIN_PSUBUSW128,
17138
17139 IX86_BUILTIN_PAND128,
17140 IX86_BUILTIN_PANDN128,
17141 IX86_BUILTIN_POR128,
17142 IX86_BUILTIN_PXOR128,
17143
17144 IX86_BUILTIN_PAVGB128,
17145 IX86_BUILTIN_PAVGW128,
17146
17147 IX86_BUILTIN_PCMPEQB128,
17148 IX86_BUILTIN_PCMPEQW128,
17149 IX86_BUILTIN_PCMPEQD128,
17150 IX86_BUILTIN_PCMPGTB128,
17151 IX86_BUILTIN_PCMPGTW128,
17152 IX86_BUILTIN_PCMPGTD128,
17153
17154 IX86_BUILTIN_PMADDWD128,
17155
17156 IX86_BUILTIN_PMAXSW128,
17157 IX86_BUILTIN_PMAXUB128,
17158 IX86_BUILTIN_PMINSW128,
17159 IX86_BUILTIN_PMINUB128,
17160
17161 IX86_BUILTIN_PMULUDQ,
17162 IX86_BUILTIN_PMULUDQ128,
17163 IX86_BUILTIN_PMULHUW128,
17164 IX86_BUILTIN_PMULHW128,
17165 IX86_BUILTIN_PMULLW128,
17166
17167 IX86_BUILTIN_PSADBW128,
17168 IX86_BUILTIN_PSHUFHW,
17169 IX86_BUILTIN_PSHUFLW,
17170 IX86_BUILTIN_PSHUFD,
17171
17172 IX86_BUILTIN_PSLLDQI128,
17173 IX86_BUILTIN_PSLLWI128,
17174 IX86_BUILTIN_PSLLDI128,
17175 IX86_BUILTIN_PSLLQI128,
17176 IX86_BUILTIN_PSRAWI128,
17177 IX86_BUILTIN_PSRADI128,
17178 IX86_BUILTIN_PSRLDQI128,
17179 IX86_BUILTIN_PSRLWI128,
17180 IX86_BUILTIN_PSRLDI128,
17181 IX86_BUILTIN_PSRLQI128,
17182
17183 IX86_BUILTIN_PSLLDQ128,
17184 IX86_BUILTIN_PSLLW128,
17185 IX86_BUILTIN_PSLLD128,
17186 IX86_BUILTIN_PSLLQ128,
17187 IX86_BUILTIN_PSRAW128,
17188 IX86_BUILTIN_PSRAD128,
17189 IX86_BUILTIN_PSRLW128,
17190 IX86_BUILTIN_PSRLD128,
17191 IX86_BUILTIN_PSRLQ128,
17192
17193 IX86_BUILTIN_PUNPCKHBW128,
17194 IX86_BUILTIN_PUNPCKHWD128,
17195 IX86_BUILTIN_PUNPCKHDQ128,
17196 IX86_BUILTIN_PUNPCKHQDQ128,
17197 IX86_BUILTIN_PUNPCKLBW128,
17198 IX86_BUILTIN_PUNPCKLWD128,
17199 IX86_BUILTIN_PUNPCKLDQ128,
17200 IX86_BUILTIN_PUNPCKLQDQ128,
17201
17202 IX86_BUILTIN_CLFLUSH,
17203 IX86_BUILTIN_MFENCE,
17204 IX86_BUILTIN_LFENCE,
17205
17206 /* Prescott New Instructions. */
17207 IX86_BUILTIN_ADDSUBPS,
17208 IX86_BUILTIN_HADDPS,
17209 IX86_BUILTIN_HSUBPS,
17210 IX86_BUILTIN_MOVSHDUP,
17211 IX86_BUILTIN_MOVSLDUP,
17212 IX86_BUILTIN_ADDSUBPD,
17213 IX86_BUILTIN_HADDPD,
17214 IX86_BUILTIN_HSUBPD,
17215 IX86_BUILTIN_LDDQU,
17216
17217 IX86_BUILTIN_MONITOR,
17218 IX86_BUILTIN_MWAIT,
17219
17220 /* SSSE3. */
17221 IX86_BUILTIN_PHADDW,
17222 IX86_BUILTIN_PHADDD,
17223 IX86_BUILTIN_PHADDSW,
17224 IX86_BUILTIN_PHSUBW,
17225 IX86_BUILTIN_PHSUBD,
17226 IX86_BUILTIN_PHSUBSW,
17227 IX86_BUILTIN_PMADDUBSW,
17228 IX86_BUILTIN_PMULHRSW,
17229 IX86_BUILTIN_PSHUFB,
17230 IX86_BUILTIN_PSIGNB,
17231 IX86_BUILTIN_PSIGNW,
17232 IX86_BUILTIN_PSIGND,
17233 IX86_BUILTIN_PALIGNR,
17234 IX86_BUILTIN_PABSB,
17235 IX86_BUILTIN_PABSW,
17236 IX86_BUILTIN_PABSD,
17237
17238 IX86_BUILTIN_PHADDW128,
17239 IX86_BUILTIN_PHADDD128,
17240 IX86_BUILTIN_PHADDSW128,
17241 IX86_BUILTIN_PHSUBW128,
17242 IX86_BUILTIN_PHSUBD128,
17243 IX86_BUILTIN_PHSUBSW128,
17244 IX86_BUILTIN_PMADDUBSW128,
17245 IX86_BUILTIN_PMULHRSW128,
17246 IX86_BUILTIN_PSHUFB128,
17247 IX86_BUILTIN_PSIGNB128,
17248 IX86_BUILTIN_PSIGNW128,
17249 IX86_BUILTIN_PSIGND128,
17250 IX86_BUILTIN_PALIGNR128,
17251 IX86_BUILTIN_PABSB128,
17252 IX86_BUILTIN_PABSW128,
17253 IX86_BUILTIN_PABSD128,
17254
17255 /* AMDFAM10 - SSE4A New Instructions. */
17256 IX86_BUILTIN_MOVNTSD,
17257 IX86_BUILTIN_MOVNTSS,
17258 IX86_BUILTIN_EXTRQI,
17259 IX86_BUILTIN_EXTRQ,
17260 IX86_BUILTIN_INSERTQI,
17261 IX86_BUILTIN_INSERTQ,
17262
17263 /* SSE4.1. */
17264 IX86_BUILTIN_BLENDPD,
17265 IX86_BUILTIN_BLENDPS,
17266 IX86_BUILTIN_BLENDVPD,
17267 IX86_BUILTIN_BLENDVPS,
17268 IX86_BUILTIN_PBLENDVB128,
17269 IX86_BUILTIN_PBLENDW128,
17270
17271 IX86_BUILTIN_DPPD,
17272 IX86_BUILTIN_DPPS,
17273
17274 IX86_BUILTIN_INSERTPS128,
17275
17276 IX86_BUILTIN_MOVNTDQA,
17277 IX86_BUILTIN_MPSADBW128,
17278 IX86_BUILTIN_PACKUSDW128,
17279 IX86_BUILTIN_PCMPEQQ,
17280 IX86_BUILTIN_PHMINPOSUW128,
17281
17282 IX86_BUILTIN_PMAXSB128,
17283 IX86_BUILTIN_PMAXSD128,
17284 IX86_BUILTIN_PMAXUD128,
17285 IX86_BUILTIN_PMAXUW128,
17286
17287 IX86_BUILTIN_PMINSB128,
17288 IX86_BUILTIN_PMINSD128,
17289 IX86_BUILTIN_PMINUD128,
17290 IX86_BUILTIN_PMINUW128,
17291
17292 IX86_BUILTIN_PMOVSXBW128,
17293 IX86_BUILTIN_PMOVSXBD128,
17294 IX86_BUILTIN_PMOVSXBQ128,
17295 IX86_BUILTIN_PMOVSXWD128,
17296 IX86_BUILTIN_PMOVSXWQ128,
17297 IX86_BUILTIN_PMOVSXDQ128,
17298
17299 IX86_BUILTIN_PMOVZXBW128,
17300 IX86_BUILTIN_PMOVZXBD128,
17301 IX86_BUILTIN_PMOVZXBQ128,
17302 IX86_BUILTIN_PMOVZXWD128,
17303 IX86_BUILTIN_PMOVZXWQ128,
17304 IX86_BUILTIN_PMOVZXDQ128,
17305
17306 IX86_BUILTIN_PMULDQ128,
17307 IX86_BUILTIN_PMULLD128,
17308
17309 IX86_BUILTIN_ROUNDPD,
17310 IX86_BUILTIN_ROUNDPS,
17311 IX86_BUILTIN_ROUNDSD,
17312 IX86_BUILTIN_ROUNDSS,
17313
17314 IX86_BUILTIN_PTESTZ,
17315 IX86_BUILTIN_PTESTC,
17316 IX86_BUILTIN_PTESTNZC,
17317
17318 IX86_BUILTIN_VEC_INIT_V2SI,
17319 IX86_BUILTIN_VEC_INIT_V4HI,
17320 IX86_BUILTIN_VEC_INIT_V8QI,
17321 IX86_BUILTIN_VEC_EXT_V2DF,
17322 IX86_BUILTIN_VEC_EXT_V2DI,
17323 IX86_BUILTIN_VEC_EXT_V4SF,
17324 IX86_BUILTIN_VEC_EXT_V4SI,
17325 IX86_BUILTIN_VEC_EXT_V8HI,
17326 IX86_BUILTIN_VEC_EXT_V2SI,
17327 IX86_BUILTIN_VEC_EXT_V4HI,
17328 IX86_BUILTIN_VEC_EXT_V16QI,
17329 IX86_BUILTIN_VEC_SET_V2DI,
17330 IX86_BUILTIN_VEC_SET_V4SF,
17331 IX86_BUILTIN_VEC_SET_V4SI,
17332 IX86_BUILTIN_VEC_SET_V8HI,
17333 IX86_BUILTIN_VEC_SET_V4HI,
17334 IX86_BUILTIN_VEC_SET_V16QI,
17335
17336 IX86_BUILTIN_VEC_PACK_SFIX,
17337
17338 /* SSE4.2. */
17339 IX86_BUILTIN_CRC32QI,
17340 IX86_BUILTIN_CRC32HI,
17341 IX86_BUILTIN_CRC32SI,
17342 IX86_BUILTIN_CRC32DI,
17343
17344 IX86_BUILTIN_PCMPESTRI128,
17345 IX86_BUILTIN_PCMPESTRM128,
17346 IX86_BUILTIN_PCMPESTRA128,
17347 IX86_BUILTIN_PCMPESTRC128,
17348 IX86_BUILTIN_PCMPESTRO128,
17349 IX86_BUILTIN_PCMPESTRS128,
17350 IX86_BUILTIN_PCMPESTRZ128,
17351 IX86_BUILTIN_PCMPISTRI128,
17352 IX86_BUILTIN_PCMPISTRM128,
17353 IX86_BUILTIN_PCMPISTRA128,
17354 IX86_BUILTIN_PCMPISTRC128,
17355 IX86_BUILTIN_PCMPISTRO128,
17356 IX86_BUILTIN_PCMPISTRS128,
17357 IX86_BUILTIN_PCMPISTRZ128,
17358
17359 IX86_BUILTIN_PCMPGTQ,
17360
17361 /* TFmode support builtins. */
17362 IX86_BUILTIN_INFQ,
17363 IX86_BUILTIN_FABSQ,
17364 IX86_BUILTIN_COPYSIGNQ,
17365
17366 /* SSE5 instructions */
17367 IX86_BUILTIN_FMADDSS,
17368 IX86_BUILTIN_FMADDSD,
17369 IX86_BUILTIN_FMADDPS,
17370 IX86_BUILTIN_FMADDPD,
17371 IX86_BUILTIN_FMSUBSS,
17372 IX86_BUILTIN_FMSUBSD,
17373 IX86_BUILTIN_FMSUBPS,
17374 IX86_BUILTIN_FMSUBPD,
17375 IX86_BUILTIN_FNMADDSS,
17376 IX86_BUILTIN_FNMADDSD,
17377 IX86_BUILTIN_FNMADDPS,
17378 IX86_BUILTIN_FNMADDPD,
17379 IX86_BUILTIN_FNMSUBSS,
17380 IX86_BUILTIN_FNMSUBSD,
17381 IX86_BUILTIN_FNMSUBPS,
17382 IX86_BUILTIN_FNMSUBPD,
17383 IX86_BUILTIN_PCMOV_V2DI,
17384 IX86_BUILTIN_PCMOV_V4SI,
17385 IX86_BUILTIN_PCMOV_V8HI,
17386 IX86_BUILTIN_PCMOV_V16QI,
17387 IX86_BUILTIN_PCMOV_V4SF,
17388 IX86_BUILTIN_PCMOV_V2DF,
17389 IX86_BUILTIN_PPERM,
17390 IX86_BUILTIN_PERMPS,
17391 IX86_BUILTIN_PERMPD,
17392 IX86_BUILTIN_PMACSSWW,
17393 IX86_BUILTIN_PMACSWW,
17394 IX86_BUILTIN_PMACSSWD,
17395 IX86_BUILTIN_PMACSWD,
17396 IX86_BUILTIN_PMACSSDD,
17397 IX86_BUILTIN_PMACSDD,
17398 IX86_BUILTIN_PMACSSDQL,
17399 IX86_BUILTIN_PMACSSDQH,
17400 IX86_BUILTIN_PMACSDQL,
17401 IX86_BUILTIN_PMACSDQH,
17402 IX86_BUILTIN_PMADCSSWD,
17403 IX86_BUILTIN_PMADCSWD,
17404 IX86_BUILTIN_PHADDBW,
17405 IX86_BUILTIN_PHADDBD,
17406 IX86_BUILTIN_PHADDBQ,
17407 IX86_BUILTIN_PHADDWD,
17408 IX86_BUILTIN_PHADDWQ,
17409 IX86_BUILTIN_PHADDDQ,
17410 IX86_BUILTIN_PHADDUBW,
17411 IX86_BUILTIN_PHADDUBD,
17412 IX86_BUILTIN_PHADDUBQ,
17413 IX86_BUILTIN_PHADDUWD,
17414 IX86_BUILTIN_PHADDUWQ,
17415 IX86_BUILTIN_PHADDUDQ,
17416 IX86_BUILTIN_PHSUBBW,
17417 IX86_BUILTIN_PHSUBWD,
17418 IX86_BUILTIN_PHSUBDQ,
17419 IX86_BUILTIN_PROTB,
17420 IX86_BUILTIN_PROTW,
17421 IX86_BUILTIN_PROTD,
17422 IX86_BUILTIN_PROTQ,
17423 IX86_BUILTIN_PROTB_IMM,
17424 IX86_BUILTIN_PROTW_IMM,
17425 IX86_BUILTIN_PROTD_IMM,
17426 IX86_BUILTIN_PROTQ_IMM,
17427 IX86_BUILTIN_PSHLB,
17428 IX86_BUILTIN_PSHLW,
17429 IX86_BUILTIN_PSHLD,
17430 IX86_BUILTIN_PSHLQ,
17431 IX86_BUILTIN_PSHAB,
17432 IX86_BUILTIN_PSHAW,
17433 IX86_BUILTIN_PSHAD,
17434 IX86_BUILTIN_PSHAQ,
17435 IX86_BUILTIN_FRCZSS,
17436 IX86_BUILTIN_FRCZSD,
17437 IX86_BUILTIN_FRCZPS,
17438 IX86_BUILTIN_FRCZPD,
17439 IX86_BUILTIN_CVTPH2PS,
17440 IX86_BUILTIN_CVTPS2PH,
17441
17442 IX86_BUILTIN_COMEQSS,
17443 IX86_BUILTIN_COMNESS,
17444 IX86_BUILTIN_COMLTSS,
17445 IX86_BUILTIN_COMLESS,
17446 IX86_BUILTIN_COMGTSS,
17447 IX86_BUILTIN_COMGESS,
17448 IX86_BUILTIN_COMUEQSS,
17449 IX86_BUILTIN_COMUNESS,
17450 IX86_BUILTIN_COMULTSS,
17451 IX86_BUILTIN_COMULESS,
17452 IX86_BUILTIN_COMUGTSS,
17453 IX86_BUILTIN_COMUGESS,
17454 IX86_BUILTIN_COMORDSS,
17455 IX86_BUILTIN_COMUNORDSS,
17456 IX86_BUILTIN_COMFALSESS,
17457 IX86_BUILTIN_COMTRUESS,
17458
17459 IX86_BUILTIN_COMEQSD,
17460 IX86_BUILTIN_COMNESD,
17461 IX86_BUILTIN_COMLTSD,
17462 IX86_BUILTIN_COMLESD,
17463 IX86_BUILTIN_COMGTSD,
17464 IX86_BUILTIN_COMGESD,
17465 IX86_BUILTIN_COMUEQSD,
17466 IX86_BUILTIN_COMUNESD,
17467 IX86_BUILTIN_COMULTSD,
17468 IX86_BUILTIN_COMULESD,
17469 IX86_BUILTIN_COMUGTSD,
17470 IX86_BUILTIN_COMUGESD,
17471 IX86_BUILTIN_COMORDSD,
17472 IX86_BUILTIN_COMUNORDSD,
17473 IX86_BUILTIN_COMFALSESD,
17474 IX86_BUILTIN_COMTRUESD,
17475
17476 IX86_BUILTIN_COMEQPS,
17477 IX86_BUILTIN_COMNEPS,
17478 IX86_BUILTIN_COMLTPS,
17479 IX86_BUILTIN_COMLEPS,
17480 IX86_BUILTIN_COMGTPS,
17481 IX86_BUILTIN_COMGEPS,
17482 IX86_BUILTIN_COMUEQPS,
17483 IX86_BUILTIN_COMUNEPS,
17484 IX86_BUILTIN_COMULTPS,
17485 IX86_BUILTIN_COMULEPS,
17486 IX86_BUILTIN_COMUGTPS,
17487 IX86_BUILTIN_COMUGEPS,
17488 IX86_BUILTIN_COMORDPS,
17489 IX86_BUILTIN_COMUNORDPS,
17490 IX86_BUILTIN_COMFALSEPS,
17491 IX86_BUILTIN_COMTRUEPS,
17492
17493 IX86_BUILTIN_COMEQPD,
17494 IX86_BUILTIN_COMNEPD,
17495 IX86_BUILTIN_COMLTPD,
17496 IX86_BUILTIN_COMLEPD,
17497 IX86_BUILTIN_COMGTPD,
17498 IX86_BUILTIN_COMGEPD,
17499 IX86_BUILTIN_COMUEQPD,
17500 IX86_BUILTIN_COMUNEPD,
17501 IX86_BUILTIN_COMULTPD,
17502 IX86_BUILTIN_COMULEPD,
17503 IX86_BUILTIN_COMUGTPD,
17504 IX86_BUILTIN_COMUGEPD,
17505 IX86_BUILTIN_COMORDPD,
17506 IX86_BUILTIN_COMUNORDPD,
17507 IX86_BUILTIN_COMFALSEPD,
17508 IX86_BUILTIN_COMTRUEPD,
17509
17510 IX86_BUILTIN_PCOMEQUB,
17511 IX86_BUILTIN_PCOMNEUB,
17512 IX86_BUILTIN_PCOMLTUB,
17513 IX86_BUILTIN_PCOMLEUB,
17514 IX86_BUILTIN_PCOMGTUB,
17515 IX86_BUILTIN_PCOMGEUB,
17516 IX86_BUILTIN_PCOMFALSEUB,
17517 IX86_BUILTIN_PCOMTRUEUB,
17518 IX86_BUILTIN_PCOMEQUW,
17519 IX86_BUILTIN_PCOMNEUW,
17520 IX86_BUILTIN_PCOMLTUW,
17521 IX86_BUILTIN_PCOMLEUW,
17522 IX86_BUILTIN_PCOMGTUW,
17523 IX86_BUILTIN_PCOMGEUW,
17524 IX86_BUILTIN_PCOMFALSEUW,
17525 IX86_BUILTIN_PCOMTRUEUW,
17526 IX86_BUILTIN_PCOMEQUD,
17527 IX86_BUILTIN_PCOMNEUD,
17528 IX86_BUILTIN_PCOMLTUD,
17529 IX86_BUILTIN_PCOMLEUD,
17530 IX86_BUILTIN_PCOMGTUD,
17531 IX86_BUILTIN_PCOMGEUD,
17532 IX86_BUILTIN_PCOMFALSEUD,
17533 IX86_BUILTIN_PCOMTRUEUD,
17534 IX86_BUILTIN_PCOMEQUQ,
17535 IX86_BUILTIN_PCOMNEUQ,
17536 IX86_BUILTIN_PCOMLTUQ,
17537 IX86_BUILTIN_PCOMLEUQ,
17538 IX86_BUILTIN_PCOMGTUQ,
17539 IX86_BUILTIN_PCOMGEUQ,
17540 IX86_BUILTIN_PCOMFALSEUQ,
17541 IX86_BUILTIN_PCOMTRUEUQ,
17542
17543 IX86_BUILTIN_PCOMEQB,
17544 IX86_BUILTIN_PCOMNEB,
17545 IX86_BUILTIN_PCOMLTB,
17546 IX86_BUILTIN_PCOMLEB,
17547 IX86_BUILTIN_PCOMGTB,
17548 IX86_BUILTIN_PCOMGEB,
17549 IX86_BUILTIN_PCOMFALSEB,
17550 IX86_BUILTIN_PCOMTRUEB,
17551 IX86_BUILTIN_PCOMEQW,
17552 IX86_BUILTIN_PCOMNEW,
17553 IX86_BUILTIN_PCOMLTW,
17554 IX86_BUILTIN_PCOMLEW,
17555 IX86_BUILTIN_PCOMGTW,
17556 IX86_BUILTIN_PCOMGEW,
17557 IX86_BUILTIN_PCOMFALSEW,
17558 IX86_BUILTIN_PCOMTRUEW,
17559 IX86_BUILTIN_PCOMEQD,
17560 IX86_BUILTIN_PCOMNED,
17561 IX86_BUILTIN_PCOMLTD,
17562 IX86_BUILTIN_PCOMLED,
17563 IX86_BUILTIN_PCOMGTD,
17564 IX86_BUILTIN_PCOMGED,
17565 IX86_BUILTIN_PCOMFALSED,
17566 IX86_BUILTIN_PCOMTRUED,
17567 IX86_BUILTIN_PCOMEQQ,
17568 IX86_BUILTIN_PCOMNEQ,
17569 IX86_BUILTIN_PCOMLTQ,
17570 IX86_BUILTIN_PCOMLEQ,
17571 IX86_BUILTIN_PCOMGTQ,
17572 IX86_BUILTIN_PCOMGEQ,
17573 IX86_BUILTIN_PCOMFALSEQ,
17574 IX86_BUILTIN_PCOMTRUEQ,
17575
17576 IX86_BUILTIN_MAX
17577 };
17578
17579 /* Table for the ix86 builtin decls. */
17580 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
17581
17582 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
17583 * if the target_flags include one of MASK. Stores the function decl
17584 * in the ix86_builtins array.
17585 * Returns the function decl or NULL_TREE, if the builtin was not added. */
17586
17587 static inline tree
17588 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
17589 {
17590 tree decl = NULL_TREE;
17591
17592 if (mask & ix86_isa_flags
17593 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
17594 {
17595 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
17596 NULL, NULL_TREE);
17597 ix86_builtins[(int) code] = decl;
17598 }
17599
17600 return decl;
17601 }
17602
17603 /* Like def_builtin, but also marks the function decl "const". */
17604
17605 static inline tree
17606 def_builtin_const (int mask, const char *name, tree type,
17607 enum ix86_builtins code)
17608 {
17609 tree decl = def_builtin (mask, name, type, code);
17610 if (decl)
17611 TREE_READONLY (decl) = 1;
17612 return decl;
17613 }
17614
17615 /* Bits for builtin_description.flag. */
17616
17617 /* Set when we don't support the comparison natively, and should
17618 swap_comparison in order to support it. */
17619 #define BUILTIN_DESC_SWAP_OPERANDS 1
17620
17621 struct builtin_description
17622 {
17623 const unsigned int mask;
17624 const enum insn_code icode;
17625 const char *const name;
17626 const enum ix86_builtins code;
17627 const enum rtx_code comparison;
17628 const int flag;
17629 };
17630
17631 static const struct builtin_description bdesc_comi[] =
17632 {
17633 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
17634 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
17635 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
17636 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
17637 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
17638 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
17639 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
17640 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
17641 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
17642 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
17643 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
17644 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
17645 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
17646 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
17647 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
17648 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
17649 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
17650 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
17651 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
17652 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
17653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
17654 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
17655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
17656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
17657 };
17658
17659 static const struct builtin_description bdesc_ptest[] =
17660 {
17661 /* SSE4.1 */
17662 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
17663 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
17664 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
17665 };
17666
17667 static const struct builtin_description bdesc_pcmpestr[] =
17668 {
17669 /* SSE4.2 */
17670 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
17671 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
17672 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
17673 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
17674 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
17675 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
17676 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
17677 };
17678
17679 static const struct builtin_description bdesc_pcmpistr[] =
17680 {
17681 /* SSE4.2 */
17682 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
17683 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
17684 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
17685 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
17686 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
17687 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
17688 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
17689 };
17690
17691 static const struct builtin_description bdesc_crc32[] =
17692 {
17693 /* SSE4.2 */
17694 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
17695 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
17696 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
17697 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
17698 };
17699
17700 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
17701 static const struct builtin_description bdesc_sse_3arg[] =
17702 {
17703 /* SSE4.1 */
17704 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
17705 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
17706 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
17707 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
17708 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
17709 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
17710 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
17711 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
17712 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
17713 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
17714 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
17715 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
17716 };
17717
17718 static const struct builtin_description bdesc_2arg[] =
17719 {
17720 /* SSE */
17721 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
17722 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
17723 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
17724 { OPTION_MASK_ISA_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
17725 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
17726 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
17727 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
17728 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, 0 },
17729
17730 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
17731 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
17732 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
17733 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
17734 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
17735 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
17736 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
17737 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
17738 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
17739 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17740 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17741 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
17742 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
17743 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
17744 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
17745 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
17746 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
17747 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
17748 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
17749 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17750 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17751 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
17752
17753 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, 0 },
17754 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, 0 },
17755 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, 0 },
17756 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, 0 },
17757
17758 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, 0 },
17759 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, 0 },
17760 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, 0 },
17761 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, 0 },
17762
17763 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, 0 },
17764 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, 0 },
17765 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, 0 },
17766 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, 0 },
17767 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, 0 },
17768
17769 /* MMX */
17770 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
17771 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
17772 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
17773 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
17774 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
17775 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
17776 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
17777 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
17778
17779 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
17780 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
17781 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, 0 },
17782 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, 0 },
17783 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, 0 },
17784 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, 0 },
17785 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, 0 },
17786 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, 0 },
17787
17788 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, 0 },
17789 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, 0 },
17790 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, 0 },
17791
17792 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, 0 },
17793 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, 0 },
17794 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, 0 },
17795 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, 0 },
17796
17797 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, 0 },
17798 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, 0 },
17799
17800 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, 0 },
17801 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, 0 },
17802 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, 0 },
17803 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, 0 },
17804 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, 0 },
17805 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, 0 },
17806
17807 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, 0 },
17808 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, 0 },
17809 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, 0 },
17810 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, 0 },
17811
17812 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, 0 },
17813 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, 0 },
17814 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, 0 },
17815 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, 0 },
17816 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, 0 },
17817 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, 0 },
17818
17819 /* Special. */
17820 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, UNKNOWN, 0 },
17821 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, UNKNOWN, 0 },
17822 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, UNKNOWN, 0 },
17823
17824 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, UNKNOWN, 0 },
17825 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
17826 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
17827
17828 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
17829 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
17830 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
17831 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
17832 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
17833 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
17834
17835 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
17836 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
17837 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
17838 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
17839 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
17840 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
17841
17842 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
17843 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
17844 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
17845 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
17846
17847 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
17848 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
17849
17850 /* SSE2 */
17851 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, 0 },
17852 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, 0 },
17853 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, 0 },
17854 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, 0 },
17855 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, 0 },
17856 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, 0 },
17857 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, 0 },
17858 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, 0 },
17859
17860 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
17861 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
17862 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
17863 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
17864 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
17865 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
17866 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
17867 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
17868 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
17869 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17870 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17871 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
17872 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
17873 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
17874 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
17875 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
17876 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
17877 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
17878 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
17879 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
17880
17881 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, 0 },
17882 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, 0 },
17883 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, 0 },
17884 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, 0 },
17885
17886 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, 0 },
17887 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, 0 },
17888 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, 0 },
17889 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, 0 },
17890
17891 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, 0 },
17892 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
17893 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
17894
17895 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 },
17896
17897 /* SSE2 MMX */
17898 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
17899 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
17900 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, 0 },
17901 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, 0 },
17902 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, 0 },
17903 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, 0 },
17904 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, 0 },
17905 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, 0 },
17906
17907 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, 0 },
17908 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, 0 },
17909 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, 0 },
17910 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, 0 },
17911 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, 0 },
17912 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, 0 },
17913 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, 0 },
17914 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, 0 },
17915
17916 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, 0 },
17917 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN, 0 },
17918
17919 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, 0 },
17920 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, 0 },
17921 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, 0 },
17922 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, 0 },
17923
17924 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, 0 },
17925 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, 0 },
17926
17927 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, 0 },
17928 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, 0 },
17929 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, 0 },
17930 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, 0 },
17931 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, 0 },
17932 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, 0 },
17933
17934 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, 0 },
17935 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, 0 },
17936 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, 0 },
17937 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, 0 },
17938
17939 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, 0 },
17940 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, 0 },
17941 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, 0 },
17942 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, 0 },
17943 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, 0 },
17944 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, 0 },
17945 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, 0 },
17946 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, 0 },
17947
17948 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, 0 },
17949 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, 0 },
17950 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, 0 },
17951
17952 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
17953 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
17954
17955 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
17956 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
17957
17958 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
17959 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
17960 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
17961
17962 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
17963 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
17964 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
17965
17966 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
17967 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
17968
17969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
17970
17971 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
17972 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, UNKNOWN, 0 },
17973 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, UNKNOWN, 0 },
17974 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, UNKNOWN, 0 },
17975
17976 /* SSE3 MMX */
17977 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, 0 },
17978 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, 0 },
17979 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, 0 },
17980 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, 0 },
17981 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, 0 },
17982 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, 0 },
17983
17984 /* SSSE3 */
17985 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, 0 },
17986 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, 0 },
17987 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, 0 },
17988 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, 0 },
17989 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, 0 },
17990 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, 0 },
17991 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, 0 },
17992 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, 0 },
17993 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, 0 },
17994 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, 0 },
17995 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, 0 },
17996 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, 0 },
17997 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, 0 },
17998 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, 0 },
17999 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, 0 },
18000 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, 0 },
18001 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, 0 },
18002 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, 0 },
18003 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, 0 },
18004 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, 0 },
18005 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, 0 },
18006 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, 0 },
18007 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, 0 },
18008 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, 0 },
18009
18010 /* SSE4.1 */
18011 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
18012 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
18013 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
18014 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
18015 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
18016 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
18017 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
18018 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
18019 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
18020 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
18021 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
18022 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
18023
18024 /* SSE4.2 */
18025 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
18026 };
18027
18028 static const struct builtin_description bdesc_1arg[] =
18029 {
18030 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
18031 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
18032
18033 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
18034 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
18035 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
18036
18037 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
18038 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, UNKNOWN, 0 },
18039 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, UNKNOWN, 0 },
18040 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, UNKNOWN, 0 },
18041 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
18042 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
18043
18044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
18045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
18046
18047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, UNKNOWN, 0 },
18048
18049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, UNKNOWN, 0 },
18050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, UNKNOWN, 0 },
18051
18052 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, UNKNOWN, 0 },
18053 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, UNKNOWN, 0 },
18054 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, UNKNOWN, 0 },
18055 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, 0 },
18056 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, UNKNOWN, 0 },
18057
18058 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, UNKNOWN, 0 },
18059
18060 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, UNKNOWN, 0 },
18061 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, UNKNOWN, 0 },
18062 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, UNKNOWN, 0 },
18063 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, 0 },
18064
18065 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, UNKNOWN, 0 },
18066 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, UNKNOWN, 0 },
18067 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, 0 },
18068
18069 /* SSE3 */
18070 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, 0 },
18071 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, 0 },
18072
18073 /* SSSE3 */
18074 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, 0 },
18075 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 0 },
18076 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, 0 },
18077 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 0 },
18078 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, 0 },
18079 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 0 },
18080
18081 /* SSE4.1 */
18082 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
18083 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
18084 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
18085 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
18086 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
18087 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
18088 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
18089 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
18090 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
18091 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
18092 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
18093 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
18094 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
18095
18096 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
18097 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
18098 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
18099 };
18100
18101 /* SSE5 */
18102 enum multi_arg_type {
18103 MULTI_ARG_UNKNOWN,
18104 MULTI_ARG_3_SF,
18105 MULTI_ARG_3_DF,
18106 MULTI_ARG_3_DI,
18107 MULTI_ARG_3_SI,
18108 MULTI_ARG_3_SI_DI,
18109 MULTI_ARG_3_HI,
18110 MULTI_ARG_3_HI_SI,
18111 MULTI_ARG_3_QI,
18112 MULTI_ARG_3_PERMPS,
18113 MULTI_ARG_3_PERMPD,
18114 MULTI_ARG_2_SF,
18115 MULTI_ARG_2_DF,
18116 MULTI_ARG_2_DI,
18117 MULTI_ARG_2_SI,
18118 MULTI_ARG_2_HI,
18119 MULTI_ARG_2_QI,
18120 MULTI_ARG_2_DI_IMM,
18121 MULTI_ARG_2_SI_IMM,
18122 MULTI_ARG_2_HI_IMM,
18123 MULTI_ARG_2_QI_IMM,
18124 MULTI_ARG_2_SF_CMP,
18125 MULTI_ARG_2_DF_CMP,
18126 MULTI_ARG_2_DI_CMP,
18127 MULTI_ARG_2_SI_CMP,
18128 MULTI_ARG_2_HI_CMP,
18129 MULTI_ARG_2_QI_CMP,
18130 MULTI_ARG_2_DI_TF,
18131 MULTI_ARG_2_SI_TF,
18132 MULTI_ARG_2_HI_TF,
18133 MULTI_ARG_2_QI_TF,
18134 MULTI_ARG_2_SF_TF,
18135 MULTI_ARG_2_DF_TF,
18136 MULTI_ARG_1_SF,
18137 MULTI_ARG_1_DF,
18138 MULTI_ARG_1_DI,
18139 MULTI_ARG_1_SI,
18140 MULTI_ARG_1_HI,
18141 MULTI_ARG_1_QI,
18142 MULTI_ARG_1_SI_DI,
18143 MULTI_ARG_1_HI_DI,
18144 MULTI_ARG_1_HI_SI,
18145 MULTI_ARG_1_QI_DI,
18146 MULTI_ARG_1_QI_SI,
18147 MULTI_ARG_1_QI_HI,
18148 MULTI_ARG_1_PH2PS,
18149 MULTI_ARG_1_PS2PH
18150 };
18151
18152 static const struct builtin_description bdesc_multi_arg[] =
18153 {
18154 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
18155 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
18156 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
18157 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
18158 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
18159 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
18160 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
18161 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
18162 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
18163 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
18164 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
18165 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
18166 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
18167 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
18168 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
18169 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
18170 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18171 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18172 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
18173 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
18174 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
18175 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
18176 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
18177 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
18178 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
18179 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
18180 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
18181 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
18182 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18183 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
18184 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
18185 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
18186 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18187 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18188 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18189 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18190 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18191 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
18192 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
18193 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
18194 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
18195 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
18196 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
18197 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
18198 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
18199 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
18200 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
18201 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
18202 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
18203 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
18204 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
18205 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
18206 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
18207 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
18208 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
18209 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
18210 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
18211 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
18212 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
18213 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
18214 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
18215 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
18216 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
18217 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
18218 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
18219 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
18220 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
18221 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
18222 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
18223 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
18224 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
18225 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
18226 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
18227 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
18228 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
18229
18230 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
18231 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18232 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18233 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
18234 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
18235 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
18236 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
18237 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18238 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18239 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18240 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18241 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18242 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18243 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18244 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18245 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18246
18247 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
18248 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18249 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18250 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
18251 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
18252 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
18253 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
18254 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18255 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18256 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18257 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18258 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18259 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18260 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18261 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18262 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18263
18264 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
18265 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18266 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18267 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
18268 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
18269 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
18270 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
18271 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18272 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18273 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18274 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18275 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18276 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18277 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18278 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18279 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18280
18281 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
18282 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18283 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18284 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
18285 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
18286 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
18287 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
18288 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18289 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18290 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18291 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18292 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18293 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18294 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18295 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18296 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18297
18298 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
18299 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18300 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18301 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
18302 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
18303 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
18304 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
18305
18306 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
18307 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18308 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18309 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
18310 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
18311 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
18312 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
18313
18314 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
18315 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18316 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18317 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
18318 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
18319 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
18320 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
18321
18322 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18323 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18324 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18325 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
18326 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
18327 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
18328 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
18329
18330 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
18331 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18332 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18333 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
18334 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
18335 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
18336 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
18337
18338 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
18339 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18340 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18341 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
18342 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
18343 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
18344 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
18345
18346 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
18347 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18348 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18349 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
18350 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
18351 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
18352 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
18353
18354 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18355 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18356 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18357 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
18358 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
18359 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
18360 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
18361
18362 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
18363 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
18364 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
18365 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
18366 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
18367 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
18368 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
18369 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
18370
18371 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18372 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18373 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18374 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18375 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18376 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18377 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18378 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18379
18380 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18381 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18382 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18383 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18384 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18385 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18386 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18387 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18388 };
18389
18390 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
18391 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
18392 builtins. */
18393 static void
18394 ix86_init_mmx_sse_builtins (void)
18395 {
18396 const struct builtin_description * d;
18397 size_t i;
18398
18399 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
18400 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
18401 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
18402 tree V2DI_type_node
18403 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
18404 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
18405 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
18406 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
18407 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
18408 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
18409 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
18410
18411 tree pchar_type_node = build_pointer_type (char_type_node);
18412 tree pcchar_type_node = build_pointer_type (
18413 build_type_variant (char_type_node, 1, 0));
18414 tree pfloat_type_node = build_pointer_type (float_type_node);
18415 tree pcfloat_type_node = build_pointer_type (
18416 build_type_variant (float_type_node, 1, 0));
18417 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
18418 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
18419 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
18420
18421 /* Comparisons. */
18422 tree int_ftype_v4sf_v4sf
18423 = build_function_type_list (integer_type_node,
18424 V4SF_type_node, V4SF_type_node, NULL_TREE);
18425 tree v4si_ftype_v4sf_v4sf
18426 = build_function_type_list (V4SI_type_node,
18427 V4SF_type_node, V4SF_type_node, NULL_TREE);
18428 /* MMX/SSE/integer conversions. */
18429 tree int_ftype_v4sf
18430 = build_function_type_list (integer_type_node,
18431 V4SF_type_node, NULL_TREE);
18432 tree int64_ftype_v4sf
18433 = build_function_type_list (long_long_integer_type_node,
18434 V4SF_type_node, NULL_TREE);
18435 tree int_ftype_v8qi
18436 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
18437 tree v4sf_ftype_v4sf_int
18438 = build_function_type_list (V4SF_type_node,
18439 V4SF_type_node, integer_type_node, NULL_TREE);
18440 tree v4sf_ftype_v4sf_int64
18441 = build_function_type_list (V4SF_type_node,
18442 V4SF_type_node, long_long_integer_type_node,
18443 NULL_TREE);
18444 tree v4sf_ftype_v4sf_v2si
18445 = build_function_type_list (V4SF_type_node,
18446 V4SF_type_node, V2SI_type_node, NULL_TREE);
18447
18448 /* Miscellaneous. */
18449 tree v8qi_ftype_v4hi_v4hi
18450 = build_function_type_list (V8QI_type_node,
18451 V4HI_type_node, V4HI_type_node, NULL_TREE);
18452 tree v4hi_ftype_v2si_v2si
18453 = build_function_type_list (V4HI_type_node,
18454 V2SI_type_node, V2SI_type_node, NULL_TREE);
18455 tree v4sf_ftype_v4sf_v4sf_int
18456 = build_function_type_list (V4SF_type_node,
18457 V4SF_type_node, V4SF_type_node,
18458 integer_type_node, NULL_TREE);
18459 tree v2si_ftype_v4hi_v4hi
18460 = build_function_type_list (V2SI_type_node,
18461 V4HI_type_node, V4HI_type_node, NULL_TREE);
18462 tree v4hi_ftype_v4hi_int
18463 = build_function_type_list (V4HI_type_node,
18464 V4HI_type_node, integer_type_node, NULL_TREE);
18465 tree v4hi_ftype_v4hi_di
18466 = build_function_type_list (V4HI_type_node,
18467 V4HI_type_node, long_long_unsigned_type_node,
18468 NULL_TREE);
18469 tree v2si_ftype_v2si_di
18470 = build_function_type_list (V2SI_type_node,
18471 V2SI_type_node, long_long_unsigned_type_node,
18472 NULL_TREE);
18473 tree void_ftype_void
18474 = build_function_type (void_type_node, void_list_node);
18475 tree void_ftype_unsigned
18476 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
18477 tree void_ftype_unsigned_unsigned
18478 = build_function_type_list (void_type_node, unsigned_type_node,
18479 unsigned_type_node, NULL_TREE);
18480 tree void_ftype_pcvoid_unsigned_unsigned
18481 = build_function_type_list (void_type_node, const_ptr_type_node,
18482 unsigned_type_node, unsigned_type_node,
18483 NULL_TREE);
18484 tree unsigned_ftype_void
18485 = build_function_type (unsigned_type_node, void_list_node);
18486 tree v2si_ftype_v4sf
18487 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
18488 /* Loads/stores. */
18489 tree void_ftype_v8qi_v8qi_pchar
18490 = build_function_type_list (void_type_node,
18491 V8QI_type_node, V8QI_type_node,
18492 pchar_type_node, NULL_TREE);
18493 tree v4sf_ftype_pcfloat
18494 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
18495 /* @@@ the type is bogus */
18496 tree v4sf_ftype_v4sf_pv2si
18497 = build_function_type_list (V4SF_type_node,
18498 V4SF_type_node, pv2si_type_node, NULL_TREE);
18499 tree void_ftype_pv2si_v4sf
18500 = build_function_type_list (void_type_node,
18501 pv2si_type_node, V4SF_type_node, NULL_TREE);
18502 tree void_ftype_pfloat_v4sf
18503 = build_function_type_list (void_type_node,
18504 pfloat_type_node, V4SF_type_node, NULL_TREE);
18505 tree void_ftype_pdi_di
18506 = build_function_type_list (void_type_node,
18507 pdi_type_node, long_long_unsigned_type_node,
18508 NULL_TREE);
18509 tree void_ftype_pv2di_v2di
18510 = build_function_type_list (void_type_node,
18511 pv2di_type_node, V2DI_type_node, NULL_TREE);
18512 /* Normal vector unops. */
18513 tree v4sf_ftype_v4sf
18514 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
18515 tree v16qi_ftype_v16qi
18516 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
18517 tree v8hi_ftype_v8hi
18518 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
18519 tree v4si_ftype_v4si
18520 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
18521 tree v8qi_ftype_v8qi
18522 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
18523 tree v4hi_ftype_v4hi
18524 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
18525
18526 /* Normal vector binops. */
18527 tree v4sf_ftype_v4sf_v4sf
18528 = build_function_type_list (V4SF_type_node,
18529 V4SF_type_node, V4SF_type_node, NULL_TREE);
18530 tree v8qi_ftype_v8qi_v8qi
18531 = build_function_type_list (V8QI_type_node,
18532 V8QI_type_node, V8QI_type_node, NULL_TREE);
18533 tree v4hi_ftype_v4hi_v4hi
18534 = build_function_type_list (V4HI_type_node,
18535 V4HI_type_node, V4HI_type_node, NULL_TREE);
18536 tree v2si_ftype_v2si_v2si
18537 = build_function_type_list (V2SI_type_node,
18538 V2SI_type_node, V2SI_type_node, NULL_TREE);
18539 tree di_ftype_di_di
18540 = build_function_type_list (long_long_unsigned_type_node,
18541 long_long_unsigned_type_node,
18542 long_long_unsigned_type_node, NULL_TREE);
18543
18544 tree di_ftype_di_di_int
18545 = build_function_type_list (long_long_unsigned_type_node,
18546 long_long_unsigned_type_node,
18547 long_long_unsigned_type_node,
18548 integer_type_node, NULL_TREE);
18549
18550 tree v2si_ftype_v2sf
18551 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
18552 tree v2sf_ftype_v2si
18553 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
18554 tree v2si_ftype_v2si
18555 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
18556 tree v2sf_ftype_v2sf
18557 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
18558 tree v2sf_ftype_v2sf_v2sf
18559 = build_function_type_list (V2SF_type_node,
18560 V2SF_type_node, V2SF_type_node, NULL_TREE);
18561 tree v2si_ftype_v2sf_v2sf
18562 = build_function_type_list (V2SI_type_node,
18563 V2SF_type_node, V2SF_type_node, NULL_TREE);
18564 tree pint_type_node = build_pointer_type (integer_type_node);
18565 tree pdouble_type_node = build_pointer_type (double_type_node);
18566 tree pcdouble_type_node = build_pointer_type (
18567 build_type_variant (double_type_node, 1, 0));
18568 tree int_ftype_v2df_v2df
18569 = build_function_type_list (integer_type_node,
18570 V2DF_type_node, V2DF_type_node, NULL_TREE);
18571
18572 tree void_ftype_pcvoid
18573 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
18574 tree v4sf_ftype_v4si
18575 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
18576 tree v4si_ftype_v4sf
18577 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
18578 tree v2df_ftype_v4si
18579 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
18580 tree v4si_ftype_v2df
18581 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
18582 tree v4si_ftype_v2df_v2df
18583 = build_function_type_list (V4SI_type_node,
18584 V2DF_type_node, V2DF_type_node, NULL_TREE);
18585 tree v2si_ftype_v2df
18586 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
18587 tree v4sf_ftype_v2df
18588 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
18589 tree v2df_ftype_v2si
18590 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
18591 tree v2df_ftype_v4sf
18592 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
18593 tree int_ftype_v2df
18594 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
18595 tree int64_ftype_v2df
18596 = build_function_type_list (long_long_integer_type_node,
18597 V2DF_type_node, NULL_TREE);
18598 tree v2df_ftype_v2df_int
18599 = build_function_type_list (V2DF_type_node,
18600 V2DF_type_node, integer_type_node, NULL_TREE);
18601 tree v2df_ftype_v2df_int64
18602 = build_function_type_list (V2DF_type_node,
18603 V2DF_type_node, long_long_integer_type_node,
18604 NULL_TREE);
18605 tree v4sf_ftype_v4sf_v2df
18606 = build_function_type_list (V4SF_type_node,
18607 V4SF_type_node, V2DF_type_node, NULL_TREE);
18608 tree v2df_ftype_v2df_v4sf
18609 = build_function_type_list (V2DF_type_node,
18610 V2DF_type_node, V4SF_type_node, NULL_TREE);
18611 tree v2df_ftype_v2df_v2df_int
18612 = build_function_type_list (V2DF_type_node,
18613 V2DF_type_node, V2DF_type_node,
18614 integer_type_node,
18615 NULL_TREE);
18616 tree v2df_ftype_v2df_pcdouble
18617 = build_function_type_list (V2DF_type_node,
18618 V2DF_type_node, pcdouble_type_node, NULL_TREE);
18619 tree void_ftype_pdouble_v2df
18620 = build_function_type_list (void_type_node,
18621 pdouble_type_node, V2DF_type_node, NULL_TREE);
18622 tree void_ftype_pint_int
18623 = build_function_type_list (void_type_node,
18624 pint_type_node, integer_type_node, NULL_TREE);
18625 tree void_ftype_v16qi_v16qi_pchar
18626 = build_function_type_list (void_type_node,
18627 V16QI_type_node, V16QI_type_node,
18628 pchar_type_node, NULL_TREE);
18629 tree v2df_ftype_pcdouble
18630 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
18631 tree v2df_ftype_v2df_v2df
18632 = build_function_type_list (V2DF_type_node,
18633 V2DF_type_node, V2DF_type_node, NULL_TREE);
18634 tree v16qi_ftype_v16qi_v16qi
18635 = build_function_type_list (V16QI_type_node,
18636 V16QI_type_node, V16QI_type_node, NULL_TREE);
18637 tree v8hi_ftype_v8hi_v8hi
18638 = build_function_type_list (V8HI_type_node,
18639 V8HI_type_node, V8HI_type_node, NULL_TREE);
18640 tree v4si_ftype_v4si_v4si
18641 = build_function_type_list (V4SI_type_node,
18642 V4SI_type_node, V4SI_type_node, NULL_TREE);
18643 tree v2di_ftype_v2di_v2di
18644 = build_function_type_list (V2DI_type_node,
18645 V2DI_type_node, V2DI_type_node, NULL_TREE);
18646 tree v2di_ftype_v2df_v2df
18647 = build_function_type_list (V2DI_type_node,
18648 V2DF_type_node, V2DF_type_node, NULL_TREE);
18649 tree v2df_ftype_v2df
18650 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
18651 tree v2di_ftype_v2di_int
18652 = build_function_type_list (V2DI_type_node,
18653 V2DI_type_node, integer_type_node, NULL_TREE);
18654 tree v2di_ftype_v2di_v2di_int
18655 = build_function_type_list (V2DI_type_node, V2DI_type_node,
18656 V2DI_type_node, integer_type_node, NULL_TREE);
18657 tree v4si_ftype_v4si_int
18658 = build_function_type_list (V4SI_type_node,
18659 V4SI_type_node, integer_type_node, NULL_TREE);
18660 tree v8hi_ftype_v8hi_int
18661 = build_function_type_list (V8HI_type_node,
18662 V8HI_type_node, integer_type_node, NULL_TREE);
18663 tree v4si_ftype_v8hi_v8hi
18664 = build_function_type_list (V4SI_type_node,
18665 V8HI_type_node, V8HI_type_node, NULL_TREE);
18666 tree di_ftype_v8qi_v8qi
18667 = build_function_type_list (long_long_unsigned_type_node,
18668 V8QI_type_node, V8QI_type_node, NULL_TREE);
18669 tree di_ftype_v2si_v2si
18670 = build_function_type_list (long_long_unsigned_type_node,
18671 V2SI_type_node, V2SI_type_node, NULL_TREE);
18672 tree v2di_ftype_v16qi_v16qi
18673 = build_function_type_list (V2DI_type_node,
18674 V16QI_type_node, V16QI_type_node, NULL_TREE);
18675 tree v2di_ftype_v4si_v4si
18676 = build_function_type_list (V2DI_type_node,
18677 V4SI_type_node, V4SI_type_node, NULL_TREE);
18678 tree int_ftype_v16qi
18679 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
18680 tree v16qi_ftype_pcchar
18681 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
18682 tree void_ftype_pchar_v16qi
18683 = build_function_type_list (void_type_node,
18684 pchar_type_node, V16QI_type_node, NULL_TREE);
18685
18686 tree v2di_ftype_v2di_unsigned_unsigned
18687 = build_function_type_list (V2DI_type_node, V2DI_type_node,
18688 unsigned_type_node, unsigned_type_node,
18689 NULL_TREE);
18690 tree v2di_ftype_v2di_v2di_unsigned_unsigned
18691 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
18692 unsigned_type_node, unsigned_type_node,
18693 NULL_TREE);
18694 tree v2di_ftype_v2di_v16qi
18695 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
18696 NULL_TREE);
18697 tree v2df_ftype_v2df_v2df_v2df
18698 = build_function_type_list (V2DF_type_node,
18699 V2DF_type_node, V2DF_type_node,
18700 V2DF_type_node, NULL_TREE);
18701 tree v4sf_ftype_v4sf_v4sf_v4sf
18702 = build_function_type_list (V4SF_type_node,
18703 V4SF_type_node, V4SF_type_node,
18704 V4SF_type_node, NULL_TREE);
18705 tree v8hi_ftype_v16qi
18706 = build_function_type_list (V8HI_type_node, V16QI_type_node,
18707 NULL_TREE);
18708 tree v4si_ftype_v16qi
18709 = build_function_type_list (V4SI_type_node, V16QI_type_node,
18710 NULL_TREE);
18711 tree v2di_ftype_v16qi
18712 = build_function_type_list (V2DI_type_node, V16QI_type_node,
18713 NULL_TREE);
18714 tree v4si_ftype_v8hi
18715 = build_function_type_list (V4SI_type_node, V8HI_type_node,
18716 NULL_TREE);
18717 tree v2di_ftype_v8hi
18718 = build_function_type_list (V2DI_type_node, V8HI_type_node,
18719 NULL_TREE);
18720 tree v2di_ftype_v4si
18721 = build_function_type_list (V2DI_type_node, V4SI_type_node,
18722 NULL_TREE);
18723 tree v2di_ftype_pv2di
18724 = build_function_type_list (V2DI_type_node, pv2di_type_node,
18725 NULL_TREE);
18726 tree v16qi_ftype_v16qi_v16qi_int
18727 = build_function_type_list (V16QI_type_node, V16QI_type_node,
18728 V16QI_type_node, integer_type_node,
18729 NULL_TREE);
18730 tree v16qi_ftype_v16qi_v16qi_v16qi
18731 = build_function_type_list (V16QI_type_node, V16QI_type_node,
18732 V16QI_type_node, V16QI_type_node,
18733 NULL_TREE);
18734 tree v8hi_ftype_v8hi_v8hi_int
18735 = build_function_type_list (V8HI_type_node, V8HI_type_node,
18736 V8HI_type_node, integer_type_node,
18737 NULL_TREE);
18738 tree v4si_ftype_v4si_v4si_int
18739 = build_function_type_list (V4SI_type_node, V4SI_type_node,
18740 V4SI_type_node, integer_type_node,
18741 NULL_TREE);
18742 tree int_ftype_v2di_v2di
18743 = build_function_type_list (integer_type_node,
18744 V2DI_type_node, V2DI_type_node,
18745 NULL_TREE);
18746 tree int_ftype_v16qi_int_v16qi_int_int
18747 = build_function_type_list (integer_type_node,
18748 V16QI_type_node,
18749 integer_type_node,
18750 V16QI_type_node,
18751 integer_type_node,
18752 integer_type_node,
18753 NULL_TREE);
18754 tree v16qi_ftype_v16qi_int_v16qi_int_int
18755 = build_function_type_list (V16QI_type_node,
18756 V16QI_type_node,
18757 integer_type_node,
18758 V16QI_type_node,
18759 integer_type_node,
18760 integer_type_node,
18761 NULL_TREE);
18762 tree int_ftype_v16qi_v16qi_int
18763 = build_function_type_list (integer_type_node,
18764 V16QI_type_node,
18765 V16QI_type_node,
18766 integer_type_node,
18767 NULL_TREE);
18768
18769 /* SSE5 instructions */
18770 tree v2di_ftype_v2di_v2di_v2di
18771 = build_function_type_list (V2DI_type_node,
18772 V2DI_type_node,
18773 V2DI_type_node,
18774 V2DI_type_node,
18775 NULL_TREE);
18776
18777 tree v4si_ftype_v4si_v4si_v4si
18778 = build_function_type_list (V4SI_type_node,
18779 V4SI_type_node,
18780 V4SI_type_node,
18781 V4SI_type_node,
18782 NULL_TREE);
18783
18784 tree v4si_ftype_v4si_v4si_v2di
18785 = build_function_type_list (V4SI_type_node,
18786 V4SI_type_node,
18787 V4SI_type_node,
18788 V2DI_type_node,
18789 NULL_TREE);
18790
18791 tree v8hi_ftype_v8hi_v8hi_v8hi
18792 = build_function_type_list (V8HI_type_node,
18793 V8HI_type_node,
18794 V8HI_type_node,
18795 V8HI_type_node,
18796 NULL_TREE);
18797
18798 tree v8hi_ftype_v8hi_v8hi_v4si
18799 = build_function_type_list (V8HI_type_node,
18800 V8HI_type_node,
18801 V8HI_type_node,
18802 V4SI_type_node,
18803 NULL_TREE);
18804
18805 tree v2df_ftype_v2df_v2df_v16qi
18806 = build_function_type_list (V2DF_type_node,
18807 V2DF_type_node,
18808 V2DF_type_node,
18809 V16QI_type_node,
18810 NULL_TREE);
18811
18812 tree v4sf_ftype_v4sf_v4sf_v16qi
18813 = build_function_type_list (V4SF_type_node,
18814 V4SF_type_node,
18815 V4SF_type_node,
18816 V16QI_type_node,
18817 NULL_TREE);
18818
18819 tree v2di_ftype_v2di_si
18820 = build_function_type_list (V2DI_type_node,
18821 V2DI_type_node,
18822 integer_type_node,
18823 NULL_TREE);
18824
18825 tree v4si_ftype_v4si_si
18826 = build_function_type_list (V4SI_type_node,
18827 V4SI_type_node,
18828 integer_type_node,
18829 NULL_TREE);
18830
18831 tree v8hi_ftype_v8hi_si
18832 = build_function_type_list (V8HI_type_node,
18833 V8HI_type_node,
18834 integer_type_node,
18835 NULL_TREE);
18836
18837 tree v16qi_ftype_v16qi_si
18838 = build_function_type_list (V16QI_type_node,
18839 V16QI_type_node,
18840 integer_type_node,
18841 NULL_TREE);
18842 tree v4sf_ftype_v4hi
18843 = build_function_type_list (V4SF_type_node,
18844 V4HI_type_node,
18845 NULL_TREE);
18846
18847 tree v4hi_ftype_v4sf
18848 = build_function_type_list (V4HI_type_node,
18849 V4SF_type_node,
18850 NULL_TREE);
18851
18852 tree v2di_ftype_v2di
18853 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
18854
18855 tree ftype;
18856
18857 /* The __float80 type. */
18858 if (TYPE_MODE (long_double_type_node) == XFmode)
18859 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
18860 "__float80");
18861 else
18862 {
18863 /* The __float80 type. */
18864 tree float80_type_node = make_node (REAL_TYPE);
18865
18866 TYPE_PRECISION (float80_type_node) = 80;
18867 layout_type (float80_type_node);
18868 (*lang_hooks.types.register_builtin_type) (float80_type_node,
18869 "__float80");
18870 }
18871
18872 if (TARGET_64BIT)
18873 {
18874 tree float128_type_node = make_node (REAL_TYPE);
18875
18876 TYPE_PRECISION (float128_type_node) = 128;
18877 layout_type (float128_type_node);
18878 (*lang_hooks.types.register_builtin_type) (float128_type_node,
18879 "__float128");
18880
18881 /* TFmode support builtins. */
18882 ftype = build_function_type (float128_type_node,
18883 void_list_node);
18884 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
18885
18886 ftype = build_function_type_list (float128_type_node,
18887 float128_type_node,
18888 NULL_TREE);
18889 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
18890
18891 ftype = build_function_type_list (float128_type_node,
18892 float128_type_node,
18893 float128_type_node,
18894 NULL_TREE);
18895 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
18896 }
18897
18898 /* Add all SSE builtins that are more or less simple operations on
18899 three operands. */
18900 for (i = 0, d = bdesc_sse_3arg;
18901 i < ARRAY_SIZE (bdesc_sse_3arg);
18902 i++, d++)
18903 {
18904 /* Use one of the operands; the target can have a different mode for
18905 mask-generating compares. */
18906 enum machine_mode mode;
18907 tree type;
18908
18909 if (d->name == 0)
18910 continue;
18911 mode = insn_data[d->icode].operand[1].mode;
18912
18913 switch (mode)
18914 {
18915 case V16QImode:
18916 type = v16qi_ftype_v16qi_v16qi_int;
18917 break;
18918 case V8HImode:
18919 type = v8hi_ftype_v8hi_v8hi_int;
18920 break;
18921 case V4SImode:
18922 type = v4si_ftype_v4si_v4si_int;
18923 break;
18924 case V2DImode:
18925 type = v2di_ftype_v2di_v2di_int;
18926 break;
18927 case V2DFmode:
18928 type = v2df_ftype_v2df_v2df_int;
18929 break;
18930 case V4SFmode:
18931 type = v4sf_ftype_v4sf_v4sf_int;
18932 break;
18933 default:
18934 gcc_unreachable ();
18935 }
18936
18937 /* Override for variable blends. */
18938 switch (d->icode)
18939 {
18940 case CODE_FOR_sse4_1_blendvpd:
18941 type = v2df_ftype_v2df_v2df_v2df;
18942 break;
18943 case CODE_FOR_sse4_1_blendvps:
18944 type = v4sf_ftype_v4sf_v4sf_v4sf;
18945 break;
18946 case CODE_FOR_sse4_1_pblendvb:
18947 type = v16qi_ftype_v16qi_v16qi_v16qi;
18948 break;
18949 default:
18950 break;
18951 }
18952
18953 def_builtin_const (d->mask, d->name, type, d->code);
18954 }
18955
18956 /* Add all builtins that are more or less simple operations on two
18957 operands. */
18958 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18959 {
18960 /* Use one of the operands; the target can have a different mode for
18961 mask-generating compares. */
18962 enum machine_mode mode;
18963 tree type;
18964
18965 if (d->name == 0)
18966 continue;
18967 mode = insn_data[d->icode].operand[1].mode;
18968
18969 switch (mode)
18970 {
18971 case V16QImode:
18972 type = v16qi_ftype_v16qi_v16qi;
18973 break;
18974 case V8HImode:
18975 type = v8hi_ftype_v8hi_v8hi;
18976 break;
18977 case V4SImode:
18978 type = v4si_ftype_v4si_v4si;
18979 break;
18980 case V2DImode:
18981 type = v2di_ftype_v2di_v2di;
18982 break;
18983 case V2DFmode:
18984 type = v2df_ftype_v2df_v2df;
18985 break;
18986 case V4SFmode:
18987 type = v4sf_ftype_v4sf_v4sf;
18988 break;
18989 case V8QImode:
18990 type = v8qi_ftype_v8qi_v8qi;
18991 break;
18992 case V4HImode:
18993 type = v4hi_ftype_v4hi_v4hi;
18994 break;
18995 case V2SImode:
18996 type = v2si_ftype_v2si_v2si;
18997 break;
18998 case DImode:
18999 type = di_ftype_di_di;
19000 break;
19001
19002 default:
19003 gcc_unreachable ();
19004 }
19005
19006 /* Override for comparisons. */
19007 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19008 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
19009 type = v4si_ftype_v4sf_v4sf;
19010
19011 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
19012 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19013 type = v2di_ftype_v2df_v2df;
19014
19015 if (d->icode == CODE_FOR_vec_pack_sfix_v2df)
19016 type = v4si_ftype_v2df_v2df;
19017
19018 def_builtin_const (d->mask, d->name, type, d->code);
19019 }
19020
19021 /* Add all builtins that are more or less simple operations on 1 operand. */
19022 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19023 {
19024 enum machine_mode mode;
19025 tree type;
19026
19027 if (d->name == 0)
19028 continue;
19029 mode = insn_data[d->icode].operand[1].mode;
19030
19031 switch (mode)
19032 {
19033 case V16QImode:
19034 type = v16qi_ftype_v16qi;
19035 break;
19036 case V8HImode:
19037 type = v8hi_ftype_v8hi;
19038 break;
19039 case V4SImode:
19040 type = v4si_ftype_v4si;
19041 break;
19042 case V2DFmode:
19043 type = v2df_ftype_v2df;
19044 break;
19045 case V4SFmode:
19046 type = v4sf_ftype_v4sf;
19047 break;
19048 case V8QImode:
19049 type = v8qi_ftype_v8qi;
19050 break;
19051 case V4HImode:
19052 type = v4hi_ftype_v4hi;
19053 break;
19054 case V2SImode:
19055 type = v2si_ftype_v2si;
19056 break;
19057
19058 default:
19059 abort ();
19060 }
19061
19062 def_builtin_const (d->mask, d->name, type, d->code);
19063 }
19064
19065 /* pcmpestr[im] insns. */
19066 for (i = 0, d = bdesc_pcmpestr;
19067 i < ARRAY_SIZE (bdesc_pcmpestr);
19068 i++, d++)
19069 {
19070 if (d->code == IX86_BUILTIN_PCMPESTRM128)
19071 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
19072 else
19073 ftype = int_ftype_v16qi_int_v16qi_int_int;
19074 def_builtin_const (d->mask, d->name, ftype, d->code);
19075 }
19076
19077 /* pcmpistr[im] insns. */
19078 for (i = 0, d = bdesc_pcmpistr;
19079 i < ARRAY_SIZE (bdesc_pcmpistr);
19080 i++, d++)
19081 {
19082 if (d->code == IX86_BUILTIN_PCMPISTRM128)
19083 ftype = v16qi_ftype_v16qi_v16qi_int;
19084 else
19085 ftype = int_ftype_v16qi_v16qi_int;
19086 def_builtin_const (d->mask, d->name, ftype, d->code);
19087 }
19088
19089 /* Add the remaining MMX insns with somewhat more complicated types. */
19090 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
19091 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
19092 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
19093 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
19094
19095 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
19096 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
19097 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
19098
19099 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
19100 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
19101
19102 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
19103 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
19104
19105 /* comi/ucomi insns. */
19106 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19107 if (d->mask == OPTION_MASK_ISA_SSE2)
19108 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
19109 else
19110 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
19111
19112 /* ptest insns. */
19113 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19114 def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code);
19115
19116 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
19117 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
19118 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
19119
19120 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
19121 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
19122 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
19123 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
19124 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
19125 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
19126 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
19127 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
19128 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
19129 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
19130 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
19131
19132 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
19133
19134 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
19135 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
19136
19137 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
19138 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
19139 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
19140 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
19141
19142 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
19143 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
19144 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
19145 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
19146
19147 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
19148
19149 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
19150
19151 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
19152 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
19153 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
19154 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
19155 ftype = build_function_type_list (float_type_node,
19156 float_type_node,
19157 NULL_TREE);
19158 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF);
19159 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
19160 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
19161
19162 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
19163
19164 /* Original 3DNow! */
19165 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
19166 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
19167 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
19168 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
19169 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
19170 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
19171 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
19172 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
19173 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
19174 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
19175 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
19176 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
19177 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
19178 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
19179 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
19180 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
19181 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
19182 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
19183 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
19184 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
19185
19186 /* 3DNow! extension as used in the Athlon CPU. */
19187 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
19188 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
19189 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
19190 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
19191 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
19192 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
19193
19194 /* SSE2 */
19195 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
19196
19197 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
19198 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
19199
19200 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
19201 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
19202
19203 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
19204 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
19205 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
19206 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
19207 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
19208
19209 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
19210 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
19211 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
19212 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
19213
19214 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
19215 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
19216
19217 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
19218
19219 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
19220 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
19221
19222 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
19223 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
19224 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
19225 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
19226 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
19227
19228 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
19229
19230 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
19231 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
19232 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
19233 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
19234
19235 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
19236 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
19237 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
19238
19239 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
19240 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
19241 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
19242 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
19243
19244 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
19245 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
19246 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
19247
19248 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
19249 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
19250
19251 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
19252 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
19253
19254 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
19255 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
19256 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
19257 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
19258 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
19259 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
19260 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
19261
19262 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
19263 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
19264 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
19265 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
19266 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
19267 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
19268 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
19269
19270 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
19271 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
19272 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
19273 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
19274
19275 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
19276
19277 /* Prescott New Instructions. */
19278 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
19279 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
19280 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
19281
19282 /* SSSE3. */
19283 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
19284 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
19285
19286 /* SSE4.1. */
19287 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
19288 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
19289 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
19290 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
19291 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
19292 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
19293 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
19294 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
19295 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
19296 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
19297 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
19298 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
19299 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
19300 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
19301
19302 /* SSE4.1 and SSE5 */
19303 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
19304 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
19305 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
19306 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
19307
19308 /* SSE4.2. */
19309 ftype = build_function_type_list (unsigned_type_node,
19310 unsigned_type_node,
19311 unsigned_char_type_node,
19312 NULL_TREE);
19313 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
19314 ftype = build_function_type_list (unsigned_type_node,
19315 unsigned_type_node,
19316 short_unsigned_type_node,
19317 NULL_TREE);
19318 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
19319 ftype = build_function_type_list (unsigned_type_node,
19320 unsigned_type_node,
19321 unsigned_type_node,
19322 NULL_TREE);
19323 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
19324 ftype = build_function_type_list (long_long_unsigned_type_node,
19325 long_long_unsigned_type_node,
19326 long_long_unsigned_type_node,
19327 NULL_TREE);
19328 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
19329
19330 /* AMDFAM10 SSE4A New built-ins */
19331 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
19332 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
19333 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
19334 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
19335 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
19336 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
19337
19338 /* Access to the vec_init patterns. */
19339 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
19340 integer_type_node, NULL_TREE);
19341 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
19342
19343 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
19344 short_integer_type_node,
19345 short_integer_type_node,
19346 short_integer_type_node, NULL_TREE);
19347 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
19348
19349 ftype = build_function_type_list (V8QI_type_node, char_type_node,
19350 char_type_node, char_type_node,
19351 char_type_node, char_type_node,
19352 char_type_node, char_type_node,
19353 char_type_node, NULL_TREE);
19354 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
19355
19356 /* Access to the vec_extract patterns. */
19357 ftype = build_function_type_list (double_type_node, V2DF_type_node,
19358 integer_type_node, NULL_TREE);
19359 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
19360
19361 ftype = build_function_type_list (long_long_integer_type_node,
19362 V2DI_type_node, integer_type_node,
19363 NULL_TREE);
19364 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
19365
19366 ftype = build_function_type_list (float_type_node, V4SF_type_node,
19367 integer_type_node, NULL_TREE);
19368 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
19369
19370 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
19371 integer_type_node, NULL_TREE);
19372 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
19373
19374 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
19375 integer_type_node, NULL_TREE);
19376 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
19377
19378 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
19379 integer_type_node, NULL_TREE);
19380 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
19381
19382 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
19383 integer_type_node, NULL_TREE);
19384 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
19385
19386 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
19387 integer_type_node, NULL_TREE);
19388 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
19389
19390 /* Access to the vec_set patterns. */
19391 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
19392 intDI_type_node,
19393 integer_type_node, NULL_TREE);
19394 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
19395
19396 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
19397 float_type_node,
19398 integer_type_node, NULL_TREE);
19399 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
19400
19401 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
19402 intSI_type_node,
19403 integer_type_node, NULL_TREE);
19404 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
19405
19406 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
19407 intHI_type_node,
19408 integer_type_node, NULL_TREE);
19409 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
19410
19411 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
19412 intHI_type_node,
19413 integer_type_node, NULL_TREE);
19414 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
19415
19416 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
19417 intQI_type_node,
19418 integer_type_node, NULL_TREE);
19419 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
19420
19421 /* Add SSE5 multi-arg argument instructions */
19422 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
19423 {
19424 tree mtype = NULL_TREE;
19425
19426 if (d->name == 0)
19427 continue;
19428
19429 switch ((enum multi_arg_type)d->flag)
19430 {
19431 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
19432 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
19433 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
19434 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
19435 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
19436 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
19437 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
19438 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
19439 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
19440 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
19441 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
19442 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
19443 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
19444 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
19445 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
19446 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
19447 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
19448 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
19449 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
19450 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
19451 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
19452 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
19453 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
19454 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
19455 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
19456 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
19457 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
19458 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
19459 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
19460 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
19461 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
19462 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
19463 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
19464 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
19465 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
19466 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
19467 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
19468 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
19469 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
19470 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
19471 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
19472 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
19473 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
19474 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
19475 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
19476 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
19477 case MULTI_ARG_UNKNOWN:
19478 default:
19479 gcc_unreachable ();
19480 }
19481
19482 if (mtype)
19483 def_builtin_const (d->mask, d->name, mtype, d->code);
19484 }
19485 }
19486
19487 static void
19488 ix86_init_builtins (void)
19489 {
19490 if (TARGET_MMX)
19491 ix86_init_mmx_sse_builtins ();
19492 }
19493
19494 /* Errors in the source file can cause expand_expr to return const0_rtx
19495 where we expect a vector. To avoid crashing, use one of the vector
19496 clear instructions. */
19497 static rtx
19498 safe_vector_operand (rtx x, enum machine_mode mode)
19499 {
19500 if (x == const0_rtx)
19501 x = CONST0_RTX (mode);
19502 return x;
19503 }
19504
19505 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
19506 4 operands. The third argument must be a constant smaller than 8
19507 bits or xmm0. */
19508
19509 static rtx
19510 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
19511 rtx target)
19512 {
19513 rtx pat;
19514 tree arg0 = CALL_EXPR_ARG (exp, 0);
19515 tree arg1 = CALL_EXPR_ARG (exp, 1);
19516 tree arg2 = CALL_EXPR_ARG (exp, 2);
19517 rtx op0 = expand_normal (arg0);
19518 rtx op1 = expand_normal (arg1);
19519 rtx op2 = expand_normal (arg2);
19520 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19521 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
19522 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
19523 enum machine_mode mode3 = insn_data[icode].operand[3].mode;
19524
19525 if (VECTOR_MODE_P (mode1))
19526 op0 = safe_vector_operand (op0, mode1);
19527 if (VECTOR_MODE_P (mode2))
19528 op1 = safe_vector_operand (op1, mode2);
19529 if (VECTOR_MODE_P (mode3))
19530 op2 = safe_vector_operand (op2, mode3);
19531
19532 if (optimize
19533 || target == 0
19534 || GET_MODE (target) != tmode
19535 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19536 target = gen_reg_rtx (tmode);
19537
19538 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19539 op0 = copy_to_mode_reg (mode1, op0);
19540 if ((optimize && !register_operand (op1, mode2))
19541 || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
19542 op1 = copy_to_mode_reg (mode2, op1);
19543
19544 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19545 switch (icode)
19546 {
19547 case CODE_FOR_sse4_1_blendvpd:
19548 case CODE_FOR_sse4_1_blendvps:
19549 case CODE_FOR_sse4_1_pblendvb:
19550 op2 = copy_to_mode_reg (mode3, op2);
19551 break;
19552
19553 case CODE_FOR_sse4_1_roundsd:
19554 case CODE_FOR_sse4_1_roundss:
19555 error ("the third argument must be a 4-bit immediate");
19556 return const0_rtx;
19557
19558 default:
19559 error ("the third argument must be an 8-bit immediate");
19560 return const0_rtx;
19561 }
19562
19563 pat = GEN_FCN (icode) (target, op0, op1, op2);
19564 if (! pat)
19565 return 0;
19566 emit_insn (pat);
19567 return target;
19568 }
19569
19570 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
19571
19572 static rtx
19573 ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
19574 {
19575 rtx pat;
19576 tree arg0 = CALL_EXPR_ARG (exp, 0);
19577 tree arg1 = CALL_EXPR_ARG (exp, 1);
19578 rtx op0 = expand_normal (arg0);
19579 rtx op1 = expand_normal (arg1);
19580 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19581 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19582 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19583
19584 if (optimize
19585 || !target
19586 || GET_MODE (target) != tmode
19587 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19588 target = gen_reg_rtx (tmode);
19589
19590 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
19591 op0 = copy_to_mode_reg (mode0, op0);
19592 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
19593 {
19594 op1 = copy_to_reg (op1);
19595 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
19596 }
19597
19598 pat = GEN_FCN (icode) (target, op0, op1);
19599 if (! pat)
19600 return 0;
19601 emit_insn (pat);
19602 return target;
19603 }
19604
19605 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
19606
19607 static rtx
19608 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
19609 {
19610 rtx pat, xops[3];
19611 tree arg0 = CALL_EXPR_ARG (exp, 0);
19612 tree arg1 = CALL_EXPR_ARG (exp, 1);
19613 rtx op0 = expand_normal (arg0);
19614 rtx op1 = expand_normal (arg1);
19615 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19616 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19617 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19618
19619 if (VECTOR_MODE_P (mode0))
19620 op0 = safe_vector_operand (op0, mode0);
19621 if (VECTOR_MODE_P (mode1))
19622 op1 = safe_vector_operand (op1, mode1);
19623
19624 if (optimize || !target
19625 || GET_MODE (target) != tmode
19626 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19627 target = gen_reg_rtx (tmode);
19628
19629 if (GET_MODE (op1) == SImode && mode1 == TImode)
19630 {
19631 rtx x = gen_reg_rtx (V4SImode);
19632 emit_insn (gen_sse2_loadd (x, op1));
19633 op1 = gen_lowpart (TImode, x);
19634 }
19635
19636 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
19637 op0 = copy_to_mode_reg (mode0, op0);
19638 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
19639 op1 = copy_to_mode_reg (mode1, op1);
19640
19641 /* ??? Using ix86_fixup_binary_operands is problematic when
19642 we've got mismatched modes. Fake it. */
19643
19644 xops[0] = target;
19645 xops[1] = op0;
19646 xops[2] = op1;
19647
19648 if (tmode == mode0 && tmode == mode1)
19649 {
19650 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
19651 op0 = xops[1];
19652 op1 = xops[2];
19653 }
19654 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
19655 {
19656 op0 = force_reg (mode0, op0);
19657 op1 = force_reg (mode1, op1);
19658 target = gen_reg_rtx (tmode);
19659 }
19660
19661 pat = GEN_FCN (icode) (target, op0, op1);
19662 if (! pat)
19663 return 0;
19664 emit_insn (pat);
19665 return target;
19666 }
19667
19668 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
19669
19670 static rtx
19671 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
19672 enum multi_arg_type m_type,
19673 enum insn_code sub_code)
19674 {
19675 rtx pat;
19676 int i;
19677 int nargs;
19678 bool comparison_p = false;
19679 bool tf_p = false;
19680 bool last_arg_constant = false;
19681 int num_memory = 0;
19682 struct {
19683 rtx op;
19684 enum machine_mode mode;
19685 } args[4];
19686
19687 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19688
19689 switch (m_type)
19690 {
19691 case MULTI_ARG_3_SF:
19692 case MULTI_ARG_3_DF:
19693 case MULTI_ARG_3_DI:
19694 case MULTI_ARG_3_SI:
19695 case MULTI_ARG_3_SI_DI:
19696 case MULTI_ARG_3_HI:
19697 case MULTI_ARG_3_HI_SI:
19698 case MULTI_ARG_3_QI:
19699 case MULTI_ARG_3_PERMPS:
19700 case MULTI_ARG_3_PERMPD:
19701 nargs = 3;
19702 break;
19703
19704 case MULTI_ARG_2_SF:
19705 case MULTI_ARG_2_DF:
19706 case MULTI_ARG_2_DI:
19707 case MULTI_ARG_2_SI:
19708 case MULTI_ARG_2_HI:
19709 case MULTI_ARG_2_QI:
19710 nargs = 2;
19711 break;
19712
19713 case MULTI_ARG_2_DI_IMM:
19714 case MULTI_ARG_2_SI_IMM:
19715 case MULTI_ARG_2_HI_IMM:
19716 case MULTI_ARG_2_QI_IMM:
19717 nargs = 2;
19718 last_arg_constant = true;
19719 break;
19720
19721 case MULTI_ARG_1_SF:
19722 case MULTI_ARG_1_DF:
19723 case MULTI_ARG_1_DI:
19724 case MULTI_ARG_1_SI:
19725 case MULTI_ARG_1_HI:
19726 case MULTI_ARG_1_QI:
19727 case MULTI_ARG_1_SI_DI:
19728 case MULTI_ARG_1_HI_DI:
19729 case MULTI_ARG_1_HI_SI:
19730 case MULTI_ARG_1_QI_DI:
19731 case MULTI_ARG_1_QI_SI:
19732 case MULTI_ARG_1_QI_HI:
19733 case MULTI_ARG_1_PH2PS:
19734 case MULTI_ARG_1_PS2PH:
19735 nargs = 1;
19736 break;
19737
19738 case MULTI_ARG_2_SF_CMP:
19739 case MULTI_ARG_2_DF_CMP:
19740 case MULTI_ARG_2_DI_CMP:
19741 case MULTI_ARG_2_SI_CMP:
19742 case MULTI_ARG_2_HI_CMP:
19743 case MULTI_ARG_2_QI_CMP:
19744 nargs = 2;
19745 comparison_p = true;
19746 break;
19747
19748 case MULTI_ARG_2_SF_TF:
19749 case MULTI_ARG_2_DF_TF:
19750 case MULTI_ARG_2_DI_TF:
19751 case MULTI_ARG_2_SI_TF:
19752 case MULTI_ARG_2_HI_TF:
19753 case MULTI_ARG_2_QI_TF:
19754 nargs = 2;
19755 tf_p = true;
19756 break;
19757
19758 case MULTI_ARG_UNKNOWN:
19759 default:
19760 gcc_unreachable ();
19761 }
19762
19763 if (optimize || !target
19764 || GET_MODE (target) != tmode
19765 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19766 target = gen_reg_rtx (tmode);
19767
19768 gcc_assert (nargs <= 4);
19769
19770 for (i = 0; i < nargs; i++)
19771 {
19772 tree arg = CALL_EXPR_ARG (exp, i);
19773 rtx op = expand_normal (arg);
19774 int adjust = (comparison_p) ? 1 : 0;
19775 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
19776
19777 if (last_arg_constant && i == nargs-1)
19778 {
19779 if (GET_CODE (op) != CONST_INT)
19780 {
19781 error ("last argument must be an immediate");
19782 return gen_reg_rtx (tmode);
19783 }
19784 }
19785 else
19786 {
19787 if (VECTOR_MODE_P (mode))
19788 op = safe_vector_operand (op, mode);
19789
19790 /* If we aren't optimizing, only allow one memory operand to be
19791 generated. */
19792 if (memory_operand (op, mode))
19793 num_memory++;
19794
19795 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
19796
19797 if (optimize
19798 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
19799 || num_memory > 1)
19800 op = force_reg (mode, op);
19801 }
19802
19803 args[i].op = op;
19804 args[i].mode = mode;
19805 }
19806
19807 switch (nargs)
19808 {
19809 case 1:
19810 pat = GEN_FCN (icode) (target, args[0].op);
19811 break;
19812
19813 case 2:
19814 if (tf_p)
19815 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
19816 GEN_INT ((int)sub_code));
19817 else if (! comparison_p)
19818 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
19819 else
19820 {
19821 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
19822 args[0].op,
19823 args[1].op);
19824
19825 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
19826 }
19827 break;
19828
19829 case 3:
19830 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
19831 break;
19832
19833 default:
19834 gcc_unreachable ();
19835 }
19836
19837 if (! pat)
19838 return 0;
19839
19840 emit_insn (pat);
19841 return target;
19842 }
19843
19844 /* Subroutine of ix86_expand_builtin to take care of stores. */
19845
19846 static rtx
19847 ix86_expand_store_builtin (enum insn_code icode, tree exp)
19848 {
19849 rtx pat;
19850 tree arg0 = CALL_EXPR_ARG (exp, 0);
19851 tree arg1 = CALL_EXPR_ARG (exp, 1);
19852 rtx op0 = expand_normal (arg0);
19853 rtx op1 = expand_normal (arg1);
19854 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
19855 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
19856
19857 if (VECTOR_MODE_P (mode1))
19858 op1 = safe_vector_operand (op1, mode1);
19859
19860 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19861 op1 = copy_to_mode_reg (mode1, op1);
19862
19863 pat = GEN_FCN (icode) (op0, op1);
19864 if (pat)
19865 emit_insn (pat);
19866 return 0;
19867 }
19868
19869 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
19870
19871 static rtx
19872 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
19873 rtx target, int do_load)
19874 {
19875 rtx pat;
19876 tree arg0 = CALL_EXPR_ARG (exp, 0);
19877 rtx op0 = expand_normal (arg0);
19878 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19879 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19880
19881 if (optimize || !target
19882 || GET_MODE (target) != tmode
19883 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19884 target = gen_reg_rtx (tmode);
19885 if (do_load)
19886 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19887 else
19888 {
19889 if (VECTOR_MODE_P (mode0))
19890 op0 = safe_vector_operand (op0, mode0);
19891
19892 if ((optimize && !register_operand (op0, mode0))
19893 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19894 op0 = copy_to_mode_reg (mode0, op0);
19895 }
19896
19897 switch (icode)
19898 {
19899 case CODE_FOR_sse4_1_roundpd:
19900 case CODE_FOR_sse4_1_roundps:
19901 {
19902 tree arg1 = CALL_EXPR_ARG (exp, 1);
19903 rtx op1 = expand_normal (arg1);
19904 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19905
19906 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19907 {
19908 error ("the second argument must be a 4-bit immediate");
19909 return const0_rtx;
19910 }
19911 pat = GEN_FCN (icode) (target, op0, op1);
19912 }
19913 break;
19914 default:
19915 pat = GEN_FCN (icode) (target, op0);
19916 break;
19917 }
19918
19919 if (! pat)
19920 return 0;
19921 emit_insn (pat);
19922 return target;
19923 }
19924
19925 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
19926 sqrtss, rsqrtss, rcpss. */
19927
19928 static rtx
19929 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
19930 {
19931 rtx pat;
19932 tree arg0 = CALL_EXPR_ARG (exp, 0);
19933 rtx op1, op0 = expand_normal (arg0);
19934 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19935 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19936
19937 if (optimize || !target
19938 || GET_MODE (target) != tmode
19939 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19940 target = gen_reg_rtx (tmode);
19941
19942 if (VECTOR_MODE_P (mode0))
19943 op0 = safe_vector_operand (op0, mode0);
19944
19945 if ((optimize && !register_operand (op0, mode0))
19946 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19947 op0 = copy_to_mode_reg (mode0, op0);
19948
19949 op1 = op0;
19950 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
19951 op1 = copy_to_mode_reg (mode0, op1);
19952
19953 pat = GEN_FCN (icode) (target, op0, op1);
19954 if (! pat)
19955 return 0;
19956 emit_insn (pat);
19957 return target;
19958 }
19959
19960 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
19961
19962 static rtx
19963 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
19964 rtx target)
19965 {
19966 rtx pat;
19967 tree arg0 = CALL_EXPR_ARG (exp, 0);
19968 tree arg1 = CALL_EXPR_ARG (exp, 1);
19969 rtx op0 = expand_normal (arg0);
19970 rtx op1 = expand_normal (arg1);
19971 rtx op2;
19972 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
19973 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
19974 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
19975 enum rtx_code comparison = d->comparison;
19976
19977 if (VECTOR_MODE_P (mode0))
19978 op0 = safe_vector_operand (op0, mode0);
19979 if (VECTOR_MODE_P (mode1))
19980 op1 = safe_vector_operand (op1, mode1);
19981
19982 /* Swap operands if we have a comparison that isn't available in
19983 hardware. */
19984 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
19985 {
19986 rtx tmp = gen_reg_rtx (mode1);
19987 emit_move_insn (tmp, op1);
19988 op1 = op0;
19989 op0 = tmp;
19990 }
19991
19992 if (optimize || !target
19993 || GET_MODE (target) != tmode
19994 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
19995 target = gen_reg_rtx (tmode);
19996
19997 if ((optimize && !register_operand (op0, mode0))
19998 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
19999 op0 = copy_to_mode_reg (mode0, op0);
20000 if ((optimize && !register_operand (op1, mode1))
20001 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
20002 op1 = copy_to_mode_reg (mode1, op1);
20003
20004 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
20005 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
20006 if (! pat)
20007 return 0;
20008 emit_insn (pat);
20009 return target;
20010 }
20011
20012 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
20013
20014 static rtx
20015 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
20016 rtx target)
20017 {
20018 rtx pat;
20019 tree arg0 = CALL_EXPR_ARG (exp, 0);
20020 tree arg1 = CALL_EXPR_ARG (exp, 1);
20021 rtx op0 = expand_normal (arg0);
20022 rtx op1 = expand_normal (arg1);
20023 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20024 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20025 enum rtx_code comparison = d->comparison;
20026
20027 if (VECTOR_MODE_P (mode0))
20028 op0 = safe_vector_operand (op0, mode0);
20029 if (VECTOR_MODE_P (mode1))
20030 op1 = safe_vector_operand (op1, mode1);
20031
20032 /* Swap operands if we have a comparison that isn't available in
20033 hardware. */
20034 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20035 {
20036 rtx tmp = op1;
20037 op1 = op0;
20038 op0 = tmp;
20039 }
20040
20041 target = gen_reg_rtx (SImode);
20042 emit_move_insn (target, const0_rtx);
20043 target = gen_rtx_SUBREG (QImode, target, 0);
20044
20045 if ((optimize && !register_operand (op0, mode0))
20046 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20047 op0 = copy_to_mode_reg (mode0, op0);
20048 if ((optimize && !register_operand (op1, mode1))
20049 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20050 op1 = copy_to_mode_reg (mode1, op1);
20051
20052 pat = GEN_FCN (d->icode) (op0, op1);
20053 if (! pat)
20054 return 0;
20055 emit_insn (pat);
20056 emit_insn (gen_rtx_SET (VOIDmode,
20057 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20058 gen_rtx_fmt_ee (comparison, QImode,
20059 SET_DEST (pat),
20060 const0_rtx)));
20061
20062 return SUBREG_REG (target);
20063 }
20064
20065 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
20066
20067 static rtx
20068 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
20069 rtx target)
20070 {
20071 rtx pat;
20072 tree arg0 = CALL_EXPR_ARG (exp, 0);
20073 tree arg1 = CALL_EXPR_ARG (exp, 1);
20074 rtx op0 = expand_normal (arg0);
20075 rtx op1 = expand_normal (arg1);
20076 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20077 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20078 enum rtx_code comparison = d->comparison;
20079
20080 if (VECTOR_MODE_P (mode0))
20081 op0 = safe_vector_operand (op0, mode0);
20082 if (VECTOR_MODE_P (mode1))
20083 op1 = safe_vector_operand (op1, mode1);
20084
20085 target = gen_reg_rtx (SImode);
20086 emit_move_insn (target, const0_rtx);
20087 target = gen_rtx_SUBREG (QImode, target, 0);
20088
20089 if ((optimize && !register_operand (op0, mode0))
20090 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20091 op0 = copy_to_mode_reg (mode0, op0);
20092 if ((optimize && !register_operand (op1, mode1))
20093 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20094 op1 = copy_to_mode_reg (mode1, op1);
20095
20096 pat = GEN_FCN (d->icode) (op0, op1);
20097 if (! pat)
20098 return 0;
20099 emit_insn (pat);
20100 emit_insn (gen_rtx_SET (VOIDmode,
20101 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20102 gen_rtx_fmt_ee (comparison, QImode,
20103 SET_DEST (pat),
20104 const0_rtx)));
20105
20106 return SUBREG_REG (target);
20107 }
20108
20109 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
20110
20111 static rtx
20112 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
20113 tree exp, rtx target)
20114 {
20115 rtx pat;
20116 tree arg0 = CALL_EXPR_ARG (exp, 0);
20117 tree arg1 = CALL_EXPR_ARG (exp, 1);
20118 tree arg2 = CALL_EXPR_ARG (exp, 2);
20119 tree arg3 = CALL_EXPR_ARG (exp, 3);
20120 tree arg4 = CALL_EXPR_ARG (exp, 4);
20121 rtx scratch0, scratch1;
20122 rtx op0 = expand_normal (arg0);
20123 rtx op1 = expand_normal (arg1);
20124 rtx op2 = expand_normal (arg2);
20125 rtx op3 = expand_normal (arg3);
20126 rtx op4 = expand_normal (arg4);
20127 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
20128
20129 tmode0 = insn_data[d->icode].operand[0].mode;
20130 tmode1 = insn_data[d->icode].operand[1].mode;
20131 modev2 = insn_data[d->icode].operand[2].mode;
20132 modei3 = insn_data[d->icode].operand[3].mode;
20133 modev4 = insn_data[d->icode].operand[4].mode;
20134 modei5 = insn_data[d->icode].operand[5].mode;
20135 modeimm = insn_data[d->icode].operand[6].mode;
20136
20137 if (VECTOR_MODE_P (modev2))
20138 op0 = safe_vector_operand (op0, modev2);
20139 if (VECTOR_MODE_P (modev4))
20140 op2 = safe_vector_operand (op2, modev4);
20141
20142 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20143 op0 = copy_to_mode_reg (modev2, op0);
20144 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
20145 op1 = copy_to_mode_reg (modei3, op1);
20146 if ((optimize && !register_operand (op2, modev4))
20147 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
20148 op2 = copy_to_mode_reg (modev4, op2);
20149 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
20150 op3 = copy_to_mode_reg (modei5, op3);
20151
20152 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
20153 {
20154 error ("the fifth argument must be a 8-bit immediate");
20155 return const0_rtx;
20156 }
20157
20158 if (d->code == IX86_BUILTIN_PCMPESTRI128)
20159 {
20160 if (optimize || !target
20161 || GET_MODE (target) != tmode0
20162 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20163 target = gen_reg_rtx (tmode0);
20164
20165 scratch1 = gen_reg_rtx (tmode1);
20166
20167 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
20168 }
20169 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
20170 {
20171 if (optimize || !target
20172 || GET_MODE (target) != tmode1
20173 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20174 target = gen_reg_rtx (tmode1);
20175
20176 scratch0 = gen_reg_rtx (tmode0);
20177
20178 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
20179 }
20180 else
20181 {
20182 gcc_assert (d->flag);
20183
20184 scratch0 = gen_reg_rtx (tmode0);
20185 scratch1 = gen_reg_rtx (tmode1);
20186
20187 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
20188 }
20189
20190 if (! pat)
20191 return 0;
20192
20193 emit_insn (pat);
20194
20195 if (d->flag)
20196 {
20197 target = gen_reg_rtx (SImode);
20198 emit_move_insn (target, const0_rtx);
20199 target = gen_rtx_SUBREG (QImode, target, 0);
20200
20201 emit_insn
20202 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20203 gen_rtx_fmt_ee (EQ, QImode,
20204 gen_rtx_REG ((enum machine_mode) d->flag,
20205 FLAGS_REG),
20206 const0_rtx)));
20207 return SUBREG_REG (target);
20208 }
20209 else
20210 return target;
20211 }
20212
20213
20214 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
20215
20216 static rtx
20217 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
20218 tree exp, rtx target)
20219 {
20220 rtx pat;
20221 tree arg0 = CALL_EXPR_ARG (exp, 0);
20222 tree arg1 = CALL_EXPR_ARG (exp, 1);
20223 tree arg2 = CALL_EXPR_ARG (exp, 2);
20224 rtx scratch0, scratch1;
20225 rtx op0 = expand_normal (arg0);
20226 rtx op1 = expand_normal (arg1);
20227 rtx op2 = expand_normal (arg2);
20228 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
20229
20230 tmode0 = insn_data[d->icode].operand[0].mode;
20231 tmode1 = insn_data[d->icode].operand[1].mode;
20232 modev2 = insn_data[d->icode].operand[2].mode;
20233 modev3 = insn_data[d->icode].operand[3].mode;
20234 modeimm = insn_data[d->icode].operand[4].mode;
20235
20236 if (VECTOR_MODE_P (modev2))
20237 op0 = safe_vector_operand (op0, modev2);
20238 if (VECTOR_MODE_P (modev3))
20239 op1 = safe_vector_operand (op1, modev3);
20240
20241 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20242 op0 = copy_to_mode_reg (modev2, op0);
20243 if ((optimize && !register_operand (op1, modev3))
20244 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
20245 op1 = copy_to_mode_reg (modev3, op1);
20246
20247 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
20248 {
20249 error ("the third argument must be a 8-bit immediate");
20250 return const0_rtx;
20251 }
20252
20253 if (d->code == IX86_BUILTIN_PCMPISTRI128)
20254 {
20255 if (optimize || !target
20256 || GET_MODE (target) != tmode0
20257 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20258 target = gen_reg_rtx (tmode0);
20259
20260 scratch1 = gen_reg_rtx (tmode1);
20261
20262 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
20263 }
20264 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
20265 {
20266 if (optimize || !target
20267 || GET_MODE (target) != tmode1
20268 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20269 target = gen_reg_rtx (tmode1);
20270
20271 scratch0 = gen_reg_rtx (tmode0);
20272
20273 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
20274 }
20275 else
20276 {
20277 gcc_assert (d->flag);
20278
20279 scratch0 = gen_reg_rtx (tmode0);
20280 scratch1 = gen_reg_rtx (tmode1);
20281
20282 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
20283 }
20284
20285 if (! pat)
20286 return 0;
20287
20288 emit_insn (pat);
20289
20290 if (d->flag)
20291 {
20292 target = gen_reg_rtx (SImode);
20293 emit_move_insn (target, const0_rtx);
20294 target = gen_rtx_SUBREG (QImode, target, 0);
20295
20296 emit_insn
20297 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20298 gen_rtx_fmt_ee (EQ, QImode,
20299 gen_rtx_REG ((enum machine_mode) d->flag,
20300 FLAGS_REG),
20301 const0_rtx)));
20302 return SUBREG_REG (target);
20303 }
20304 else
20305 return target;
20306 }
20307
20308 /* Return the integer constant in ARG. Constrain it to be in the range
20309 of the subparts of VEC_TYPE; issue an error if not. */
20310
20311 static int
20312 get_element_number (tree vec_type, tree arg)
20313 {
20314 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
20315
20316 if (!host_integerp (arg, 1)
20317 || (elt = tree_low_cst (arg, 1), elt > max))
20318 {
20319 error ("selector must be an integer constant in the range 0..%wi", max);
20320 return 0;
20321 }
20322
20323 return elt;
20324 }
20325
20326 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20327 ix86_expand_vector_init. We DO have language-level syntax for this, in
20328 the form of (type){ init-list }. Except that since we can't place emms
20329 instructions from inside the compiler, we can't allow the use of MMX
20330 registers unless the user explicitly asks for it. So we do *not* define
20331 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
20332 we have builtins invoked by mmintrin.h that gives us license to emit
20333 these sorts of instructions. */
20334
20335 static rtx
20336 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
20337 {
20338 enum machine_mode tmode = TYPE_MODE (type);
20339 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
20340 int i, n_elt = GET_MODE_NUNITS (tmode);
20341 rtvec v = rtvec_alloc (n_elt);
20342
20343 gcc_assert (VECTOR_MODE_P (tmode));
20344 gcc_assert (call_expr_nargs (exp) == n_elt);
20345
20346 for (i = 0; i < n_elt; ++i)
20347 {
20348 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
20349 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
20350 }
20351
20352 if (!target || !register_operand (target, tmode))
20353 target = gen_reg_rtx (tmode);
20354
20355 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
20356 return target;
20357 }
20358
20359 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20360 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
20361 had a language-level syntax for referencing vector elements. */
20362
20363 static rtx
20364 ix86_expand_vec_ext_builtin (tree exp, rtx target)
20365 {
20366 enum machine_mode tmode, mode0;
20367 tree arg0, arg1;
20368 int elt;
20369 rtx op0;
20370
20371 arg0 = CALL_EXPR_ARG (exp, 0);
20372 arg1 = CALL_EXPR_ARG (exp, 1);
20373
20374 op0 = expand_normal (arg0);
20375 elt = get_element_number (TREE_TYPE (arg0), arg1);
20376
20377 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20378 mode0 = TYPE_MODE (TREE_TYPE (arg0));
20379 gcc_assert (VECTOR_MODE_P (mode0));
20380
20381 op0 = force_reg (mode0, op0);
20382
20383 if (optimize || !target || !register_operand (target, tmode))
20384 target = gen_reg_rtx (tmode);
20385
20386 ix86_expand_vector_extract (true, target, op0, elt);
20387
20388 return target;
20389 }
20390
20391 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20392 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
20393 a language-level syntax for referencing vector elements. */
20394
20395 static rtx
20396 ix86_expand_vec_set_builtin (tree exp)
20397 {
20398 enum machine_mode tmode, mode1;
20399 tree arg0, arg1, arg2;
20400 int elt;
20401 rtx op0, op1, target;
20402
20403 arg0 = CALL_EXPR_ARG (exp, 0);
20404 arg1 = CALL_EXPR_ARG (exp, 1);
20405 arg2 = CALL_EXPR_ARG (exp, 2);
20406
20407 tmode = TYPE_MODE (TREE_TYPE (arg0));
20408 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20409 gcc_assert (VECTOR_MODE_P (tmode));
20410
20411 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
20412 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
20413 elt = get_element_number (TREE_TYPE (arg0), arg2);
20414
20415 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
20416 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
20417
20418 op0 = force_reg (tmode, op0);
20419 op1 = force_reg (mode1, op1);
20420
20421 /* OP0 is the source of these builtin functions and shouldn't be
20422 modified. Create a copy, use it and return it as target. */
20423 target = gen_reg_rtx (tmode);
20424 emit_move_insn (target, op0);
20425 ix86_expand_vector_set (true, target, op1, elt);
20426
20427 return target;
20428 }
20429
20430 /* Expand an expression EXP that calls a built-in function,
20431 with result going to TARGET if that's convenient
20432 (and in mode MODE if that's convenient).
20433 SUBTARGET may be used as the target for computing one of EXP's operands.
20434 IGNORE is nonzero if the value is to be ignored. */
20435
20436 static rtx
20437 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
20438 enum machine_mode mode ATTRIBUTE_UNUSED,
20439 int ignore ATTRIBUTE_UNUSED)
20440 {
20441 const struct builtin_description *d;
20442 size_t i;
20443 enum insn_code icode;
20444 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20445 tree arg0, arg1, arg2, arg3;
20446 rtx op0, op1, op2, op3, pat;
20447 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
20448 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
20449
20450 switch (fcode)
20451 {
20452 case IX86_BUILTIN_EMMS:
20453 emit_insn (gen_mmx_emms ());
20454 return 0;
20455
20456 case IX86_BUILTIN_SFENCE:
20457 emit_insn (gen_sse_sfence ());
20458 return 0;
20459
20460 case IX86_BUILTIN_MASKMOVQ:
20461 case IX86_BUILTIN_MASKMOVDQU:
20462 icode = (fcode == IX86_BUILTIN_MASKMOVQ
20463 ? CODE_FOR_mmx_maskmovq
20464 : CODE_FOR_sse2_maskmovdqu);
20465 /* Note the arg order is different from the operand order. */
20466 arg1 = CALL_EXPR_ARG (exp, 0);
20467 arg2 = CALL_EXPR_ARG (exp, 1);
20468 arg0 = CALL_EXPR_ARG (exp, 2);
20469 op0 = expand_normal (arg0);
20470 op1 = expand_normal (arg1);
20471 op2 = expand_normal (arg2);
20472 mode0 = insn_data[icode].operand[0].mode;
20473 mode1 = insn_data[icode].operand[1].mode;
20474 mode2 = insn_data[icode].operand[2].mode;
20475
20476 op0 = force_reg (Pmode, op0);
20477 op0 = gen_rtx_MEM (mode1, op0);
20478
20479 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
20480 op0 = copy_to_mode_reg (mode0, op0);
20481 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
20482 op1 = copy_to_mode_reg (mode1, op1);
20483 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
20484 op2 = copy_to_mode_reg (mode2, op2);
20485 pat = GEN_FCN (icode) (op0, op1, op2);
20486 if (! pat)
20487 return 0;
20488 emit_insn (pat);
20489 return 0;
20490
20491 case IX86_BUILTIN_RSQRTF:
20492 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2, exp, target);
20493
20494 case IX86_BUILTIN_SQRTSS:
20495 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
20496 case IX86_BUILTIN_RSQRTSS:
20497 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
20498 case IX86_BUILTIN_RCPSS:
20499 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
20500
20501 case IX86_BUILTIN_LOADUPS:
20502 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
20503
20504 case IX86_BUILTIN_STOREUPS:
20505 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
20506
20507 case IX86_BUILTIN_LOADHPS:
20508 case IX86_BUILTIN_LOADLPS:
20509 case IX86_BUILTIN_LOADHPD:
20510 case IX86_BUILTIN_LOADLPD:
20511 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
20512 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
20513 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
20514 : CODE_FOR_sse2_loadlpd);
20515 arg0 = CALL_EXPR_ARG (exp, 0);
20516 arg1 = CALL_EXPR_ARG (exp, 1);
20517 op0 = expand_normal (arg0);
20518 op1 = expand_normal (arg1);
20519 tmode = insn_data[icode].operand[0].mode;
20520 mode0 = insn_data[icode].operand[1].mode;
20521 mode1 = insn_data[icode].operand[2].mode;
20522
20523 op0 = force_reg (mode0, op0);
20524 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
20525 if (optimize || target == 0
20526 || GET_MODE (target) != tmode
20527 || !register_operand (target, tmode))
20528 target = gen_reg_rtx (tmode);
20529 pat = GEN_FCN (icode) (target, op0, op1);
20530 if (! pat)
20531 return 0;
20532 emit_insn (pat);
20533 return target;
20534
20535 case IX86_BUILTIN_STOREHPS:
20536 case IX86_BUILTIN_STORELPS:
20537 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
20538 : CODE_FOR_sse_storelps);
20539 arg0 = CALL_EXPR_ARG (exp, 0);
20540 arg1 = CALL_EXPR_ARG (exp, 1);
20541 op0 = expand_normal (arg0);
20542 op1 = expand_normal (arg1);
20543 mode0 = insn_data[icode].operand[0].mode;
20544 mode1 = insn_data[icode].operand[1].mode;
20545
20546 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20547 op1 = force_reg (mode1, op1);
20548
20549 pat = GEN_FCN (icode) (op0, op1);
20550 if (! pat)
20551 return 0;
20552 emit_insn (pat);
20553 return const0_rtx;
20554
20555 case IX86_BUILTIN_MOVNTPS:
20556 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
20557 case IX86_BUILTIN_MOVNTQ:
20558 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
20559
20560 case IX86_BUILTIN_LDMXCSR:
20561 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
20562 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
20563 emit_move_insn (target, op0);
20564 emit_insn (gen_sse_ldmxcsr (target));
20565 return 0;
20566
20567 case IX86_BUILTIN_STMXCSR:
20568 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
20569 emit_insn (gen_sse_stmxcsr (target));
20570 return copy_to_mode_reg (SImode, target);
20571
20572 case IX86_BUILTIN_SHUFPS:
20573 case IX86_BUILTIN_SHUFPD:
20574 icode = (fcode == IX86_BUILTIN_SHUFPS
20575 ? CODE_FOR_sse_shufps
20576 : CODE_FOR_sse2_shufpd);
20577 arg0 = CALL_EXPR_ARG (exp, 0);
20578 arg1 = CALL_EXPR_ARG (exp, 1);
20579 arg2 = CALL_EXPR_ARG (exp, 2);
20580 op0 = expand_normal (arg0);
20581 op1 = expand_normal (arg1);
20582 op2 = expand_normal (arg2);
20583 tmode = insn_data[icode].operand[0].mode;
20584 mode0 = insn_data[icode].operand[1].mode;
20585 mode1 = insn_data[icode].operand[2].mode;
20586 mode2 = insn_data[icode].operand[3].mode;
20587
20588 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20589 op0 = copy_to_mode_reg (mode0, op0);
20590 if ((optimize && !register_operand (op1, mode1))
20591 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
20592 op1 = copy_to_mode_reg (mode1, op1);
20593 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20594 {
20595 /* @@@ better error message */
20596 error ("mask must be an immediate");
20597 return gen_reg_rtx (tmode);
20598 }
20599 if (optimize || target == 0
20600 || GET_MODE (target) != tmode
20601 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20602 target = gen_reg_rtx (tmode);
20603 pat = GEN_FCN (icode) (target, op0, op1, op2);
20604 if (! pat)
20605 return 0;
20606 emit_insn (pat);
20607 return target;
20608
20609 case IX86_BUILTIN_PSHUFW:
20610 case IX86_BUILTIN_PSHUFD:
20611 case IX86_BUILTIN_PSHUFHW:
20612 case IX86_BUILTIN_PSHUFLW:
20613 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
20614 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
20615 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
20616 : CODE_FOR_mmx_pshufw);
20617 arg0 = CALL_EXPR_ARG (exp, 0);
20618 arg1 = CALL_EXPR_ARG (exp, 1);
20619 op0 = expand_normal (arg0);
20620 op1 = expand_normal (arg1);
20621 tmode = insn_data[icode].operand[0].mode;
20622 mode1 = insn_data[icode].operand[1].mode;
20623 mode2 = insn_data[icode].operand[2].mode;
20624
20625 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20626 op0 = copy_to_mode_reg (mode1, op0);
20627 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20628 {
20629 /* @@@ better error message */
20630 error ("mask must be an immediate");
20631 return const0_rtx;
20632 }
20633 if (target == 0
20634 || GET_MODE (target) != tmode
20635 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20636 target = gen_reg_rtx (tmode);
20637 pat = GEN_FCN (icode) (target, op0, op1);
20638 if (! pat)
20639 return 0;
20640 emit_insn (pat);
20641 return target;
20642
20643 case IX86_BUILTIN_PSLLWI128:
20644 icode = CODE_FOR_ashlv8hi3;
20645 goto do_pshifti;
20646 case IX86_BUILTIN_PSLLDI128:
20647 icode = CODE_FOR_ashlv4si3;
20648 goto do_pshifti;
20649 case IX86_BUILTIN_PSLLQI128:
20650 icode = CODE_FOR_ashlv2di3;
20651 goto do_pshifti;
20652 case IX86_BUILTIN_PSRAWI128:
20653 icode = CODE_FOR_ashrv8hi3;
20654 goto do_pshifti;
20655 case IX86_BUILTIN_PSRADI128:
20656 icode = CODE_FOR_ashrv4si3;
20657 goto do_pshifti;
20658 case IX86_BUILTIN_PSRLWI128:
20659 icode = CODE_FOR_lshrv8hi3;
20660 goto do_pshifti;
20661 case IX86_BUILTIN_PSRLDI128:
20662 icode = CODE_FOR_lshrv4si3;
20663 goto do_pshifti;
20664 case IX86_BUILTIN_PSRLQI128:
20665 icode = CODE_FOR_lshrv2di3;
20666 goto do_pshifti;
20667 do_pshifti:
20668 arg0 = CALL_EXPR_ARG (exp, 0);
20669 arg1 = CALL_EXPR_ARG (exp, 1);
20670 op0 = expand_normal (arg0);
20671 op1 = expand_normal (arg1);
20672
20673 if (!CONST_INT_P (op1))
20674 {
20675 error ("shift must be an immediate");
20676 return const0_rtx;
20677 }
20678 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
20679 op1 = GEN_INT (255);
20680
20681 tmode = insn_data[icode].operand[0].mode;
20682 mode1 = insn_data[icode].operand[1].mode;
20683 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20684 op0 = copy_to_reg (op0);
20685
20686 target = gen_reg_rtx (tmode);
20687 pat = GEN_FCN (icode) (target, op0, op1);
20688 if (!pat)
20689 return 0;
20690 emit_insn (pat);
20691 return target;
20692
20693 case IX86_BUILTIN_PSLLW128:
20694 icode = CODE_FOR_ashlv8hi3;
20695 goto do_pshift;
20696 case IX86_BUILTIN_PSLLD128:
20697 icode = CODE_FOR_ashlv4si3;
20698 goto do_pshift;
20699 case IX86_BUILTIN_PSLLQ128:
20700 icode = CODE_FOR_ashlv2di3;
20701 goto do_pshift;
20702 case IX86_BUILTIN_PSRAW128:
20703 icode = CODE_FOR_ashrv8hi3;
20704 goto do_pshift;
20705 case IX86_BUILTIN_PSRAD128:
20706 icode = CODE_FOR_ashrv4si3;
20707 goto do_pshift;
20708 case IX86_BUILTIN_PSRLW128:
20709 icode = CODE_FOR_lshrv8hi3;
20710 goto do_pshift;
20711 case IX86_BUILTIN_PSRLD128:
20712 icode = CODE_FOR_lshrv4si3;
20713 goto do_pshift;
20714 case IX86_BUILTIN_PSRLQ128:
20715 icode = CODE_FOR_lshrv2di3;
20716 goto do_pshift;
20717 do_pshift:
20718 arg0 = CALL_EXPR_ARG (exp, 0);
20719 arg1 = CALL_EXPR_ARG (exp, 1);
20720 op0 = expand_normal (arg0);
20721 op1 = expand_normal (arg1);
20722
20723 tmode = insn_data[icode].operand[0].mode;
20724 mode1 = insn_data[icode].operand[1].mode;
20725
20726 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20727 op0 = copy_to_reg (op0);
20728
20729 op1 = simplify_gen_subreg (SImode, op1, GET_MODE (op1), 0);
20730 if (! (*insn_data[icode].operand[2].predicate) (op1, SImode))
20731 op1 = copy_to_reg (op1);
20732
20733 target = gen_reg_rtx (tmode);
20734 pat = GEN_FCN (icode) (target, op0, op1);
20735 if (!pat)
20736 return 0;
20737 emit_insn (pat);
20738 return target;
20739
20740 case IX86_BUILTIN_PSLLDQI128:
20741 case IX86_BUILTIN_PSRLDQI128:
20742 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
20743 : CODE_FOR_sse2_lshrti3);
20744 arg0 = CALL_EXPR_ARG (exp, 0);
20745 arg1 = CALL_EXPR_ARG (exp, 1);
20746 op0 = expand_normal (arg0);
20747 op1 = expand_normal (arg1);
20748 tmode = insn_data[icode].operand[0].mode;
20749 mode1 = insn_data[icode].operand[1].mode;
20750 mode2 = insn_data[icode].operand[2].mode;
20751
20752 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20753 {
20754 op0 = copy_to_reg (op0);
20755 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
20756 }
20757 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20758 {
20759 error ("shift must be an immediate");
20760 return const0_rtx;
20761 }
20762 target = gen_reg_rtx (V2DImode);
20763 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
20764 op0, op1);
20765 if (! pat)
20766 return 0;
20767 emit_insn (pat);
20768 return target;
20769
20770 case IX86_BUILTIN_FEMMS:
20771 emit_insn (gen_mmx_femms ());
20772 return NULL_RTX;
20773
20774 case IX86_BUILTIN_PAVGUSB:
20775 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
20776
20777 case IX86_BUILTIN_PF2ID:
20778 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
20779
20780 case IX86_BUILTIN_PFACC:
20781 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
20782
20783 case IX86_BUILTIN_PFADD:
20784 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
20785
20786 case IX86_BUILTIN_PFCMPEQ:
20787 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
20788
20789 case IX86_BUILTIN_PFCMPGE:
20790 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
20791
20792 case IX86_BUILTIN_PFCMPGT:
20793 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
20794
20795 case IX86_BUILTIN_PFMAX:
20796 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
20797
20798 case IX86_BUILTIN_PFMIN:
20799 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
20800
20801 case IX86_BUILTIN_PFMUL:
20802 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
20803
20804 case IX86_BUILTIN_PFRCP:
20805 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
20806
20807 case IX86_BUILTIN_PFRCPIT1:
20808 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
20809
20810 case IX86_BUILTIN_PFRCPIT2:
20811 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
20812
20813 case IX86_BUILTIN_PFRSQIT1:
20814 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
20815
20816 case IX86_BUILTIN_PFRSQRT:
20817 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
20818
20819 case IX86_BUILTIN_PFSUB:
20820 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
20821
20822 case IX86_BUILTIN_PFSUBR:
20823 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
20824
20825 case IX86_BUILTIN_PI2FD:
20826 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
20827
20828 case IX86_BUILTIN_PMULHRW:
20829 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
20830
20831 case IX86_BUILTIN_PF2IW:
20832 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
20833
20834 case IX86_BUILTIN_PFNACC:
20835 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
20836
20837 case IX86_BUILTIN_PFPNACC:
20838 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
20839
20840 case IX86_BUILTIN_PI2FW:
20841 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
20842
20843 case IX86_BUILTIN_PSWAPDSI:
20844 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
20845
20846 case IX86_BUILTIN_PSWAPDSF:
20847 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
20848
20849 case IX86_BUILTIN_SQRTSD:
20850 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
20851 case IX86_BUILTIN_LOADUPD:
20852 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
20853 case IX86_BUILTIN_STOREUPD:
20854 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
20855
20856 case IX86_BUILTIN_MFENCE:
20857 emit_insn (gen_sse2_mfence ());
20858 return 0;
20859 case IX86_BUILTIN_LFENCE:
20860 emit_insn (gen_sse2_lfence ());
20861 return 0;
20862
20863 case IX86_BUILTIN_CLFLUSH:
20864 arg0 = CALL_EXPR_ARG (exp, 0);
20865 op0 = expand_normal (arg0);
20866 icode = CODE_FOR_sse2_clflush;
20867 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
20868 op0 = copy_to_mode_reg (Pmode, op0);
20869
20870 emit_insn (gen_sse2_clflush (op0));
20871 return 0;
20872
20873 case IX86_BUILTIN_MOVNTPD:
20874 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
20875 case IX86_BUILTIN_MOVNTDQ:
20876 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
20877 case IX86_BUILTIN_MOVNTI:
20878 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
20879
20880 case IX86_BUILTIN_LOADDQU:
20881 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
20882 case IX86_BUILTIN_STOREDQU:
20883 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
20884
20885 case IX86_BUILTIN_MONITOR:
20886 arg0 = CALL_EXPR_ARG (exp, 0);
20887 arg1 = CALL_EXPR_ARG (exp, 1);
20888 arg2 = CALL_EXPR_ARG (exp, 2);
20889 op0 = expand_normal (arg0);
20890 op1 = expand_normal (arg1);
20891 op2 = expand_normal (arg2);
20892 if (!REG_P (op0))
20893 op0 = copy_to_mode_reg (Pmode, op0);
20894 if (!REG_P (op1))
20895 op1 = copy_to_mode_reg (SImode, op1);
20896 if (!REG_P (op2))
20897 op2 = copy_to_mode_reg (SImode, op2);
20898 if (!TARGET_64BIT)
20899 emit_insn (gen_sse3_monitor (op0, op1, op2));
20900 else
20901 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
20902 return 0;
20903
20904 case IX86_BUILTIN_MWAIT:
20905 arg0 = CALL_EXPR_ARG (exp, 0);
20906 arg1 = CALL_EXPR_ARG (exp, 1);
20907 op0 = expand_normal (arg0);
20908 op1 = expand_normal (arg1);
20909 if (!REG_P (op0))
20910 op0 = copy_to_mode_reg (SImode, op0);
20911 if (!REG_P (op1))
20912 op1 = copy_to_mode_reg (SImode, op1);
20913 emit_insn (gen_sse3_mwait (op0, op1));
20914 return 0;
20915
20916 case IX86_BUILTIN_LDDQU:
20917 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
20918 target, 1);
20919
20920 case IX86_BUILTIN_PALIGNR:
20921 case IX86_BUILTIN_PALIGNR128:
20922 if (fcode == IX86_BUILTIN_PALIGNR)
20923 {
20924 icode = CODE_FOR_ssse3_palignrdi;
20925 mode = DImode;
20926 }
20927 else
20928 {
20929 icode = CODE_FOR_ssse3_palignrti;
20930 mode = V2DImode;
20931 }
20932 arg0 = CALL_EXPR_ARG (exp, 0);
20933 arg1 = CALL_EXPR_ARG (exp, 1);
20934 arg2 = CALL_EXPR_ARG (exp, 2);
20935 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
20936 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
20937 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
20938 tmode = insn_data[icode].operand[0].mode;
20939 mode1 = insn_data[icode].operand[1].mode;
20940 mode2 = insn_data[icode].operand[2].mode;
20941 mode3 = insn_data[icode].operand[3].mode;
20942
20943 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20944 {
20945 op0 = copy_to_reg (op0);
20946 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
20947 }
20948 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20949 {
20950 op1 = copy_to_reg (op1);
20951 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
20952 }
20953 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
20954 {
20955 error ("shift must be an immediate");
20956 return const0_rtx;
20957 }
20958 target = gen_reg_rtx (mode);
20959 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
20960 op0, op1, op2);
20961 if (! pat)
20962 return 0;
20963 emit_insn (pat);
20964 return target;
20965
20966 case IX86_BUILTIN_MOVNTDQA:
20967 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
20968 target, 1);
20969
20970 case IX86_BUILTIN_MOVNTSD:
20971 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
20972
20973 case IX86_BUILTIN_MOVNTSS:
20974 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
20975
20976 case IX86_BUILTIN_INSERTQ:
20977 case IX86_BUILTIN_EXTRQ:
20978 icode = (fcode == IX86_BUILTIN_EXTRQ
20979 ? CODE_FOR_sse4a_extrq
20980 : CODE_FOR_sse4a_insertq);
20981 arg0 = CALL_EXPR_ARG (exp, 0);
20982 arg1 = CALL_EXPR_ARG (exp, 1);
20983 op0 = expand_normal (arg0);
20984 op1 = expand_normal (arg1);
20985 tmode = insn_data[icode].operand[0].mode;
20986 mode1 = insn_data[icode].operand[1].mode;
20987 mode2 = insn_data[icode].operand[2].mode;
20988 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20989 op0 = copy_to_mode_reg (mode1, op0);
20990 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20991 op1 = copy_to_mode_reg (mode2, op1);
20992 if (optimize || target == 0
20993 || GET_MODE (target) != tmode
20994 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20995 target = gen_reg_rtx (tmode);
20996 pat = GEN_FCN (icode) (target, op0, op1);
20997 if (! pat)
20998 return NULL_RTX;
20999 emit_insn (pat);
21000 return target;
21001
21002 case IX86_BUILTIN_EXTRQI:
21003 icode = CODE_FOR_sse4a_extrqi;
21004 arg0 = CALL_EXPR_ARG (exp, 0);
21005 arg1 = CALL_EXPR_ARG (exp, 1);
21006 arg2 = CALL_EXPR_ARG (exp, 2);
21007 op0 = expand_normal (arg0);
21008 op1 = expand_normal (arg1);
21009 op2 = expand_normal (arg2);
21010 tmode = insn_data[icode].operand[0].mode;
21011 mode1 = insn_data[icode].operand[1].mode;
21012 mode2 = insn_data[icode].operand[2].mode;
21013 mode3 = insn_data[icode].operand[3].mode;
21014 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21015 op0 = copy_to_mode_reg (mode1, op0);
21016 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21017 {
21018 error ("index mask must be an immediate");
21019 return gen_reg_rtx (tmode);
21020 }
21021 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21022 {
21023 error ("length mask must be an immediate");
21024 return gen_reg_rtx (tmode);
21025 }
21026 if (optimize || target == 0
21027 || GET_MODE (target) != tmode
21028 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21029 target = gen_reg_rtx (tmode);
21030 pat = GEN_FCN (icode) (target, op0, op1, op2);
21031 if (! pat)
21032 return NULL_RTX;
21033 emit_insn (pat);
21034 return target;
21035
21036 case IX86_BUILTIN_INSERTQI:
21037 icode = CODE_FOR_sse4a_insertqi;
21038 arg0 = CALL_EXPR_ARG (exp, 0);
21039 arg1 = CALL_EXPR_ARG (exp, 1);
21040 arg2 = CALL_EXPR_ARG (exp, 2);
21041 arg3 = CALL_EXPR_ARG (exp, 3);
21042 op0 = expand_normal (arg0);
21043 op1 = expand_normal (arg1);
21044 op2 = expand_normal (arg2);
21045 op3 = expand_normal (arg3);
21046 tmode = insn_data[icode].operand[0].mode;
21047 mode1 = insn_data[icode].operand[1].mode;
21048 mode2 = insn_data[icode].operand[2].mode;
21049 mode3 = insn_data[icode].operand[3].mode;
21050 mode4 = insn_data[icode].operand[4].mode;
21051
21052 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21053 op0 = copy_to_mode_reg (mode1, op0);
21054
21055 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21056 op1 = copy_to_mode_reg (mode2, op1);
21057
21058 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21059 {
21060 error ("index mask must be an immediate");
21061 return gen_reg_rtx (tmode);
21062 }
21063 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
21064 {
21065 error ("length mask must be an immediate");
21066 return gen_reg_rtx (tmode);
21067 }
21068 if (optimize || target == 0
21069 || GET_MODE (target) != tmode
21070 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21071 target = gen_reg_rtx (tmode);
21072 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
21073 if (! pat)
21074 return NULL_RTX;
21075 emit_insn (pat);
21076 return target;
21077
21078 case IX86_BUILTIN_VEC_INIT_V2SI:
21079 case IX86_BUILTIN_VEC_INIT_V4HI:
21080 case IX86_BUILTIN_VEC_INIT_V8QI:
21081 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
21082
21083 case IX86_BUILTIN_VEC_EXT_V2DF:
21084 case IX86_BUILTIN_VEC_EXT_V2DI:
21085 case IX86_BUILTIN_VEC_EXT_V4SF:
21086 case IX86_BUILTIN_VEC_EXT_V4SI:
21087 case IX86_BUILTIN_VEC_EXT_V8HI:
21088 case IX86_BUILTIN_VEC_EXT_V2SI:
21089 case IX86_BUILTIN_VEC_EXT_V4HI:
21090 case IX86_BUILTIN_VEC_EXT_V16QI:
21091 return ix86_expand_vec_ext_builtin (exp, target);
21092
21093 case IX86_BUILTIN_VEC_SET_V2DI:
21094 case IX86_BUILTIN_VEC_SET_V4SF:
21095 case IX86_BUILTIN_VEC_SET_V4SI:
21096 case IX86_BUILTIN_VEC_SET_V8HI:
21097 case IX86_BUILTIN_VEC_SET_V4HI:
21098 case IX86_BUILTIN_VEC_SET_V16QI:
21099 return ix86_expand_vec_set_builtin (exp);
21100
21101 case IX86_BUILTIN_INFQ:
21102 {
21103 REAL_VALUE_TYPE inf;
21104 rtx tmp;
21105
21106 real_inf (&inf);
21107 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
21108
21109 tmp = validize_mem (force_const_mem (mode, tmp));
21110
21111 if (target == 0)
21112 target = gen_reg_rtx (mode);
21113
21114 emit_move_insn (target, tmp);
21115 return target;
21116 }
21117
21118 case IX86_BUILTIN_FABSQ:
21119 return ix86_expand_unop_builtin (CODE_FOR_abstf2, exp, target, 0);
21120
21121 case IX86_BUILTIN_COPYSIGNQ:
21122 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3, exp, target);
21123
21124 default:
21125 break;
21126 }
21127
21128 for (i = 0, d = bdesc_sse_3arg;
21129 i < ARRAY_SIZE (bdesc_sse_3arg);
21130 i++, d++)
21131 if (d->code == fcode)
21132 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
21133 target);
21134
21135 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21136 if (d->code == fcode)
21137 {
21138 /* Compares are treated specially. */
21139 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
21140 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
21141 || d->icode == CODE_FOR_sse2_maskcmpv2df3
21142 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
21143 return ix86_expand_sse_compare (d, exp, target);
21144
21145 return ix86_expand_binop_builtin (d->icode, exp, target);
21146 }
21147
21148 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21149 if (d->code == fcode)
21150 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
21151
21152 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
21153 if (d->code == fcode)
21154 return ix86_expand_sse_comi (d, exp, target);
21155
21156 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
21157 if (d->code == fcode)
21158 return ix86_expand_sse_ptest (d, exp, target);
21159
21160 for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
21161 if (d->code == fcode)
21162 return ix86_expand_crc32 (d->icode, exp, target);
21163
21164 for (i = 0, d = bdesc_pcmpestr;
21165 i < ARRAY_SIZE (bdesc_pcmpestr);
21166 i++, d++)
21167 if (d->code == fcode)
21168 return ix86_expand_sse_pcmpestr (d, exp, target);
21169
21170 for (i = 0, d = bdesc_pcmpistr;
21171 i < ARRAY_SIZE (bdesc_pcmpistr);
21172 i++, d++)
21173 if (d->code == fcode)
21174 return ix86_expand_sse_pcmpistr (d, exp, target);
21175
21176 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21177 if (d->code == fcode)
21178 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
21179 (enum multi_arg_type)d->flag,
21180 d->comparison);
21181
21182 gcc_unreachable ();
21183 }
21184
21185 /* Returns a function decl for a vectorized version of the builtin function
21186 with builtin function code FN and the result vector type TYPE, or NULL_TREE
21187 if it is not available. */
21188
21189 static tree
21190 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
21191 tree type_in)
21192 {
21193 enum machine_mode in_mode, out_mode;
21194 int in_n, out_n;
21195
21196 if (TREE_CODE (type_out) != VECTOR_TYPE
21197 || TREE_CODE (type_in) != VECTOR_TYPE)
21198 return NULL_TREE;
21199
21200 out_mode = TYPE_MODE (TREE_TYPE (type_out));
21201 out_n = TYPE_VECTOR_SUBPARTS (type_out);
21202 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21203 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21204
21205 switch (fn)
21206 {
21207 case BUILT_IN_SQRT:
21208 if (out_mode == DFmode && out_n == 2
21209 && in_mode == DFmode && in_n == 2)
21210 return ix86_builtins[IX86_BUILTIN_SQRTPD];
21211 break;
21212
21213 case BUILT_IN_SQRTF:
21214 if (out_mode == SFmode && out_n == 4
21215 && in_mode == SFmode && in_n == 4)
21216 return ix86_builtins[IX86_BUILTIN_SQRTPS];
21217 break;
21218
21219 case BUILT_IN_LRINT:
21220 if (out_mode == SImode && out_n == 4
21221 && in_mode == DFmode && in_n == 2)
21222 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
21223 break;
21224
21225 case BUILT_IN_LRINTF:
21226 if (out_mode == SImode && out_n == 4
21227 && in_mode == SFmode && in_n == 4)
21228 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
21229 break;
21230
21231 default:
21232 ;
21233 }
21234
21235 /* Dispatch to a handler for a vectorization library. */
21236 if (ix86_veclib_handler)
21237 return (*ix86_veclib_handler)(fn, type_out, type_in);
21238
21239 return NULL_TREE;
21240 }
21241
21242 /* Handler for an ACML-style interface to a library with vectorized
21243 intrinsics. */
21244
21245 static tree
21246 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
21247 {
21248 char name[20] = "__vr.._";
21249 tree fntype, new_fndecl, args;
21250 unsigned arity;
21251 const char *bname;
21252 enum machine_mode el_mode, in_mode;
21253 int n, in_n;
21254
21255 /* The ACML is 64bits only and suitable for unsafe math only as
21256 it does not correctly support parts of IEEE with the required
21257 precision such as denormals. */
21258 if (!TARGET_64BIT
21259 || !flag_unsafe_math_optimizations)
21260 return NULL_TREE;
21261
21262 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21263 n = TYPE_VECTOR_SUBPARTS (type_out);
21264 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21265 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21266 if (el_mode != in_mode
21267 || n != in_n)
21268 return NULL_TREE;
21269
21270 switch (fn)
21271 {
21272 case BUILT_IN_SIN:
21273 case BUILT_IN_COS:
21274 case BUILT_IN_EXP:
21275 case BUILT_IN_LOG:
21276 case BUILT_IN_LOG2:
21277 case BUILT_IN_LOG10:
21278 name[4] = 'd';
21279 name[5] = '2';
21280 if (el_mode != DFmode
21281 || n != 2)
21282 return NULL_TREE;
21283 break;
21284
21285 case BUILT_IN_SINF:
21286 case BUILT_IN_COSF:
21287 case BUILT_IN_EXPF:
21288 case BUILT_IN_POWF:
21289 case BUILT_IN_LOGF:
21290 case BUILT_IN_LOG2F:
21291 case BUILT_IN_LOG10F:
21292 name[4] = 's';
21293 name[5] = '4';
21294 if (el_mode != SFmode
21295 || n != 4)
21296 return NULL_TREE;
21297 break;
21298
21299 default:
21300 return NULL_TREE;
21301 }
21302
21303 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21304 sprintf (name + 7, "%s", bname+10);
21305
21306 arity = 0;
21307 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21308 args = TREE_CHAIN (args))
21309 arity++;
21310
21311 if (arity == 1)
21312 fntype = build_function_type_list (type_out, type_in, NULL);
21313 else
21314 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21315
21316 /* Build a function declaration for the vectorized function. */
21317 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21318 TREE_PUBLIC (new_fndecl) = 1;
21319 DECL_EXTERNAL (new_fndecl) = 1;
21320 DECL_IS_NOVOPS (new_fndecl) = 1;
21321 TREE_READONLY (new_fndecl) = 1;
21322
21323 return new_fndecl;
21324 }
21325
21326
21327 /* Returns a decl of a function that implements conversion of the
21328 input vector of type TYPE, or NULL_TREE if it is not available. */
21329
21330 static tree
21331 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
21332 {
21333 if (TREE_CODE (type) != VECTOR_TYPE)
21334 return NULL_TREE;
21335
21336 switch (code)
21337 {
21338 case FLOAT_EXPR:
21339 switch (TYPE_MODE (type))
21340 {
21341 case V4SImode:
21342 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
21343 default:
21344 return NULL_TREE;
21345 }
21346
21347 case FIX_TRUNC_EXPR:
21348 switch (TYPE_MODE (type))
21349 {
21350 case V4SFmode:
21351 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
21352 default:
21353 return NULL_TREE;
21354 }
21355 default:
21356 return NULL_TREE;
21357
21358 }
21359 }
21360
21361 /* Returns a code for a target-specific builtin that implements
21362 reciprocal of the function, or NULL_TREE if not available. */
21363
21364 static tree
21365 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
21366 bool sqrt ATTRIBUTE_UNUSED)
21367 {
21368 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
21369 && flag_finite_math_only && !flag_trapping_math
21370 && flag_unsafe_math_optimizations))
21371 return NULL_TREE;
21372
21373 if (md_fn)
21374 /* Machine dependent builtins. */
21375 switch (fn)
21376 {
21377 /* Vectorized version of sqrt to rsqrt conversion. */
21378 case IX86_BUILTIN_SQRTPS:
21379 return ix86_builtins[IX86_BUILTIN_RSQRTPS];
21380
21381 default:
21382 return NULL_TREE;
21383 }
21384 else
21385 /* Normal builtins. */
21386 switch (fn)
21387 {
21388 /* Sqrt to rsqrt conversion. */
21389 case BUILT_IN_SQRTF:
21390 return ix86_builtins[IX86_BUILTIN_RSQRTF];
21391
21392 default:
21393 return NULL_TREE;
21394 }
21395 }
21396
21397 /* Store OPERAND to the memory after reload is completed. This means
21398 that we can't easily use assign_stack_local. */
21399 rtx
21400 ix86_force_to_memory (enum machine_mode mode, rtx operand)
21401 {
21402 rtx result;
21403
21404 gcc_assert (reload_completed);
21405 if (TARGET_RED_ZONE)
21406 {
21407 result = gen_rtx_MEM (mode,
21408 gen_rtx_PLUS (Pmode,
21409 stack_pointer_rtx,
21410 GEN_INT (-RED_ZONE_SIZE)));
21411 emit_move_insn (result, operand);
21412 }
21413 else if (!TARGET_RED_ZONE && TARGET_64BIT)
21414 {
21415 switch (mode)
21416 {
21417 case HImode:
21418 case SImode:
21419 operand = gen_lowpart (DImode, operand);
21420 /* FALLTHRU */
21421 case DImode:
21422 emit_insn (
21423 gen_rtx_SET (VOIDmode,
21424 gen_rtx_MEM (DImode,
21425 gen_rtx_PRE_DEC (DImode,
21426 stack_pointer_rtx)),
21427 operand));
21428 break;
21429 default:
21430 gcc_unreachable ();
21431 }
21432 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21433 }
21434 else
21435 {
21436 switch (mode)
21437 {
21438 case DImode:
21439 {
21440 rtx operands[2];
21441 split_di (&operand, 1, operands, operands + 1);
21442 emit_insn (
21443 gen_rtx_SET (VOIDmode,
21444 gen_rtx_MEM (SImode,
21445 gen_rtx_PRE_DEC (Pmode,
21446 stack_pointer_rtx)),
21447 operands[1]));
21448 emit_insn (
21449 gen_rtx_SET (VOIDmode,
21450 gen_rtx_MEM (SImode,
21451 gen_rtx_PRE_DEC (Pmode,
21452 stack_pointer_rtx)),
21453 operands[0]));
21454 }
21455 break;
21456 case HImode:
21457 /* Store HImodes as SImodes. */
21458 operand = gen_lowpart (SImode, operand);
21459 /* FALLTHRU */
21460 case SImode:
21461 emit_insn (
21462 gen_rtx_SET (VOIDmode,
21463 gen_rtx_MEM (GET_MODE (operand),
21464 gen_rtx_PRE_DEC (SImode,
21465 stack_pointer_rtx)),
21466 operand));
21467 break;
21468 default:
21469 gcc_unreachable ();
21470 }
21471 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21472 }
21473 return result;
21474 }
21475
21476 /* Free operand from the memory. */
21477 void
21478 ix86_free_from_memory (enum machine_mode mode)
21479 {
21480 if (!TARGET_RED_ZONE)
21481 {
21482 int size;
21483
21484 if (mode == DImode || TARGET_64BIT)
21485 size = 8;
21486 else
21487 size = 4;
21488 /* Use LEA to deallocate stack space. In peephole2 it will be converted
21489 to pop or add instruction if registers are available. */
21490 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21491 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
21492 GEN_INT (size))));
21493 }
21494 }
21495
21496 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
21497 QImode must go into class Q_REGS.
21498 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
21499 movdf to do mem-to-mem moves through integer regs. */
21500 enum reg_class
21501 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
21502 {
21503 enum machine_mode mode = GET_MODE (x);
21504
21505 /* We're only allowed to return a subclass of CLASS. Many of the
21506 following checks fail for NO_REGS, so eliminate that early. */
21507 if (regclass == NO_REGS)
21508 return NO_REGS;
21509
21510 /* All classes can load zeros. */
21511 if (x == CONST0_RTX (mode))
21512 return regclass;
21513
21514 /* Force constants into memory if we are loading a (nonzero) constant into
21515 an MMX or SSE register. This is because there are no MMX/SSE instructions
21516 to load from a constant. */
21517 if (CONSTANT_P (x)
21518 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
21519 return NO_REGS;
21520
21521 /* Prefer SSE regs only, if we can use them for math. */
21522 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
21523 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
21524
21525 /* Floating-point constants need more complex checks. */
21526 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
21527 {
21528 /* General regs can load everything. */
21529 if (reg_class_subset_p (regclass, GENERAL_REGS))
21530 return regclass;
21531
21532 /* Floats can load 0 and 1 plus some others. Note that we eliminated
21533 zero above. We only want to wind up preferring 80387 registers if
21534 we plan on doing computation with them. */
21535 if (TARGET_80387
21536 && standard_80387_constant_p (x))
21537 {
21538 /* Limit class to non-sse. */
21539 if (regclass == FLOAT_SSE_REGS)
21540 return FLOAT_REGS;
21541 if (regclass == FP_TOP_SSE_REGS)
21542 return FP_TOP_REG;
21543 if (regclass == FP_SECOND_SSE_REGS)
21544 return FP_SECOND_REG;
21545 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
21546 return regclass;
21547 }
21548
21549 return NO_REGS;
21550 }
21551
21552 /* Generally when we see PLUS here, it's the function invariant
21553 (plus soft-fp const_int). Which can only be computed into general
21554 regs. */
21555 if (GET_CODE (x) == PLUS)
21556 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
21557
21558 /* QImode constants are easy to load, but non-constant QImode data
21559 must go into Q_REGS. */
21560 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
21561 {
21562 if (reg_class_subset_p (regclass, Q_REGS))
21563 return regclass;
21564 if (reg_class_subset_p (Q_REGS, regclass))
21565 return Q_REGS;
21566 return NO_REGS;
21567 }
21568
21569 return regclass;
21570 }
21571
21572 /* Discourage putting floating-point values in SSE registers unless
21573 SSE math is being used, and likewise for the 387 registers. */
21574 enum reg_class
21575 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
21576 {
21577 enum machine_mode mode = GET_MODE (x);
21578
21579 /* Restrict the output reload class to the register bank that we are doing
21580 math on. If we would like not to return a subset of CLASS, reject this
21581 alternative: if reload cannot do this, it will still use its choice. */
21582 mode = GET_MODE (x);
21583 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21584 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
21585
21586 if (X87_FLOAT_MODE_P (mode))
21587 {
21588 if (regclass == FP_TOP_SSE_REGS)
21589 return FP_TOP_REG;
21590 else if (regclass == FP_SECOND_SSE_REGS)
21591 return FP_SECOND_REG;
21592 else
21593 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
21594 }
21595
21596 return regclass;
21597 }
21598
21599 /* If we are copying between general and FP registers, we need a memory
21600 location. The same is true for SSE and MMX registers.
21601
21602 To optimize register_move_cost performance, allow inline variant.
21603
21604 The macro can't work reliably when one of the CLASSES is class containing
21605 registers from multiple units (SSE, MMX, integer). We avoid this by never
21606 combining those units in single alternative in the machine description.
21607 Ensure that this constraint holds to avoid unexpected surprises.
21608
21609 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
21610 enforce these sanity checks. */
21611
21612 static inline int
21613 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
21614 enum machine_mode mode, int strict)
21615 {
21616 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
21617 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
21618 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
21619 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
21620 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
21621 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
21622 {
21623 gcc_assert (!strict);
21624 return true;
21625 }
21626
21627 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
21628 return true;
21629
21630 /* ??? This is a lie. We do have moves between mmx/general, and for
21631 mmx/sse2. But by saying we need secondary memory we discourage the
21632 register allocator from using the mmx registers unless needed. */
21633 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
21634 return true;
21635
21636 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21637 {
21638 /* SSE1 doesn't have any direct moves from other classes. */
21639 if (!TARGET_SSE2)
21640 return true;
21641
21642 /* If the target says that inter-unit moves are more expensive
21643 than moving through memory, then don't generate them. */
21644 if (!TARGET_INTER_UNIT_MOVES)
21645 return true;
21646
21647 /* Between SSE and general, we have moves no larger than word size. */
21648 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21649 return true;
21650 }
21651
21652 return false;
21653 }
21654
21655 int
21656 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
21657 enum machine_mode mode, int strict)
21658 {
21659 return inline_secondary_memory_needed (class1, class2, mode, strict);
21660 }
21661
21662 /* Return true if the registers in CLASS cannot represent the change from
21663 modes FROM to TO. */
21664
21665 bool
21666 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
21667 enum reg_class regclass)
21668 {
21669 if (from == to)
21670 return false;
21671
21672 /* x87 registers can't do subreg at all, as all values are reformatted
21673 to extended precision. */
21674 if (MAYBE_FLOAT_CLASS_P (regclass))
21675 return true;
21676
21677 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
21678 {
21679 /* Vector registers do not support QI or HImode loads. If we don't
21680 disallow a change to these modes, reload will assume it's ok to
21681 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
21682 the vec_dupv4hi pattern. */
21683 if (GET_MODE_SIZE (from) < 4)
21684 return true;
21685
21686 /* Vector registers do not support subreg with nonzero offsets, which
21687 are otherwise valid for integer registers. Since we can't see
21688 whether we have a nonzero offset from here, prohibit all
21689 nonparadoxical subregs changing size. */
21690 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
21691 return true;
21692 }
21693
21694 return false;
21695 }
21696
21697 /* Return the cost of moving data of mode M between a
21698 register and memory. A value of 2 is the default; this cost is
21699 relative to those in `REGISTER_MOVE_COST'.
21700
21701 This function is used extensively by register_move_cost that is used to
21702 build tables at startup. Make it inline in this case.
21703 When IN is 2, return maximum of in and out move cost.
21704
21705 If moving between registers and memory is more expensive than
21706 between two registers, you should define this macro to express the
21707 relative cost.
21708
21709 Model also increased moving costs of QImode registers in non
21710 Q_REGS classes.
21711 */
21712 static inline int
21713 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
21714 int in)
21715 {
21716 int cost;
21717 if (FLOAT_CLASS_P (regclass))
21718 {
21719 int index;
21720 switch (mode)
21721 {
21722 case SFmode:
21723 index = 0;
21724 break;
21725 case DFmode:
21726 index = 1;
21727 break;
21728 case XFmode:
21729 index = 2;
21730 break;
21731 default:
21732 return 100;
21733 }
21734 if (in == 2)
21735 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
21736 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
21737 }
21738 if (SSE_CLASS_P (regclass))
21739 {
21740 int index;
21741 switch (GET_MODE_SIZE (mode))
21742 {
21743 case 4:
21744 index = 0;
21745 break;
21746 case 8:
21747 index = 1;
21748 break;
21749 case 16:
21750 index = 2;
21751 break;
21752 default:
21753 return 100;
21754 }
21755 if (in == 2)
21756 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
21757 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
21758 }
21759 if (MMX_CLASS_P (regclass))
21760 {
21761 int index;
21762 switch (GET_MODE_SIZE (mode))
21763 {
21764 case 4:
21765 index = 0;
21766 break;
21767 case 8:
21768 index = 1;
21769 break;
21770 default:
21771 return 100;
21772 }
21773 if (in)
21774 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
21775 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
21776 }
21777 switch (GET_MODE_SIZE (mode))
21778 {
21779 case 1:
21780 if (Q_CLASS_P (regclass) || TARGET_64BIT)
21781 {
21782 if (!in)
21783 return ix86_cost->int_store[0];
21784 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
21785 cost = ix86_cost->movzbl_load;
21786 else
21787 cost = ix86_cost->int_load[0];
21788 if (in == 2)
21789 return MAX (cost, ix86_cost->int_store[0]);
21790 return cost;
21791 }
21792 else
21793 {
21794 if (in == 2)
21795 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
21796 if (in)
21797 return ix86_cost->movzbl_load;
21798 else
21799 return ix86_cost->int_store[0] + 4;
21800 }
21801 break;
21802 case 2:
21803 if (in == 2)
21804 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
21805 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
21806 default:
21807 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
21808 if (mode == TFmode)
21809 mode = XFmode;
21810 if (in == 2)
21811 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
21812 else if (in)
21813 cost = ix86_cost->int_load[2];
21814 else
21815 cost = ix86_cost->int_store[2];
21816 return (cost * (((int) GET_MODE_SIZE (mode)
21817 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
21818 }
21819 }
21820
21821 int
21822 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
21823 {
21824 return inline_memory_move_cost (mode, regclass, in);
21825 }
21826
21827
21828 /* Return the cost of moving data from a register in class CLASS1 to
21829 one in class CLASS2.
21830
21831 It is not required that the cost always equal 2 when FROM is the same as TO;
21832 on some machines it is expensive to move between registers if they are not
21833 general registers. */
21834
21835 int
21836 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
21837 enum reg_class class2)
21838 {
21839 /* In case we require secondary memory, compute cost of the store followed
21840 by load. In order to avoid bad register allocation choices, we need
21841 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
21842
21843 if (inline_secondary_memory_needed (class1, class2, mode, 0))
21844 {
21845 int cost = 1;
21846
21847 cost += inline_memory_move_cost (mode, class1, 2);
21848 cost += inline_memory_move_cost (mode, class2, 2);
21849
21850 /* In case of copying from general_purpose_register we may emit multiple
21851 stores followed by single load causing memory size mismatch stall.
21852 Count this as arbitrarily high cost of 20. */
21853 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
21854 cost += 20;
21855
21856 /* In the case of FP/MMX moves, the registers actually overlap, and we
21857 have to switch modes in order to treat them differently. */
21858 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
21859 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
21860 cost += 20;
21861
21862 return cost;
21863 }
21864
21865 /* Moves between SSE/MMX and integer unit are expensive. */
21866 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
21867 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21868
21869 /* ??? By keeping returned value relatively high, we limit the number
21870 of moves between integer and MMX/SSE registers for all targets.
21871 Additionally, high value prevents problem with x86_modes_tieable_p(),
21872 where integer modes in MMX/SSE registers are not tieable
21873 because of missing QImode and HImode moves to, from or between
21874 MMX/SSE registers. */
21875 return MAX (ix86_cost->mmxsse_to_integer, 8);
21876
21877 if (MAYBE_FLOAT_CLASS_P (class1))
21878 return ix86_cost->fp_move;
21879 if (MAYBE_SSE_CLASS_P (class1))
21880 return ix86_cost->sse_move;
21881 if (MAYBE_MMX_CLASS_P (class1))
21882 return ix86_cost->mmx_move;
21883 return 2;
21884 }
21885
21886 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
21887
21888 bool
21889 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
21890 {
21891 /* Flags and only flags can only hold CCmode values. */
21892 if (CC_REGNO_P (regno))
21893 return GET_MODE_CLASS (mode) == MODE_CC;
21894 if (GET_MODE_CLASS (mode) == MODE_CC
21895 || GET_MODE_CLASS (mode) == MODE_RANDOM
21896 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
21897 return 0;
21898 if (FP_REGNO_P (regno))
21899 return VALID_FP_MODE_P (mode);
21900 if (SSE_REGNO_P (regno))
21901 {
21902 /* We implement the move patterns for all vector modes into and
21903 out of SSE registers, even when no operation instructions
21904 are available. */
21905 return (VALID_SSE_REG_MODE (mode)
21906 || VALID_SSE2_REG_MODE (mode)
21907 || VALID_MMX_REG_MODE (mode)
21908 || VALID_MMX_REG_MODE_3DNOW (mode));
21909 }
21910 if (MMX_REGNO_P (regno))
21911 {
21912 /* We implement the move patterns for 3DNOW modes even in MMX mode,
21913 so if the register is available at all, then we can move data of
21914 the given mode into or out of it. */
21915 return (VALID_MMX_REG_MODE (mode)
21916 || VALID_MMX_REG_MODE_3DNOW (mode));
21917 }
21918
21919 if (mode == QImode)
21920 {
21921 /* Take care for QImode values - they can be in non-QI regs,
21922 but then they do cause partial register stalls. */
21923 if (regno < 4 || TARGET_64BIT)
21924 return 1;
21925 if (!TARGET_PARTIAL_REG_STALL)
21926 return 1;
21927 return reload_in_progress || reload_completed;
21928 }
21929 /* We handle both integer and floats in the general purpose registers. */
21930 else if (VALID_INT_MODE_P (mode))
21931 return 1;
21932 else if (VALID_FP_MODE_P (mode))
21933 return 1;
21934 else if (VALID_DFP_MODE_P (mode))
21935 return 1;
21936 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
21937 on to use that value in smaller contexts, this can easily force a
21938 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
21939 supporting DImode, allow it. */
21940 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
21941 return 1;
21942
21943 return 0;
21944 }
21945
21946 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
21947 tieable integer mode. */
21948
21949 static bool
21950 ix86_tieable_integer_mode_p (enum machine_mode mode)
21951 {
21952 switch (mode)
21953 {
21954 case HImode:
21955 case SImode:
21956 return true;
21957
21958 case QImode:
21959 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
21960
21961 case DImode:
21962 return TARGET_64BIT;
21963
21964 default:
21965 return false;
21966 }
21967 }
21968
21969 /* Return true if MODE1 is accessible in a register that can hold MODE2
21970 without copying. That is, all register classes that can hold MODE2
21971 can also hold MODE1. */
21972
21973 bool
21974 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
21975 {
21976 if (mode1 == mode2)
21977 return true;
21978
21979 if (ix86_tieable_integer_mode_p (mode1)
21980 && ix86_tieable_integer_mode_p (mode2))
21981 return true;
21982
21983 /* MODE2 being XFmode implies fp stack or general regs, which means we
21984 can tie any smaller floating point modes to it. Note that we do not
21985 tie this with TFmode. */
21986 if (mode2 == XFmode)
21987 return mode1 == SFmode || mode1 == DFmode;
21988
21989 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
21990 that we can tie it with SFmode. */
21991 if (mode2 == DFmode)
21992 return mode1 == SFmode;
21993
21994 /* If MODE2 is only appropriate for an SSE register, then tie with
21995 any other mode acceptable to SSE registers. */
21996 if (GET_MODE_SIZE (mode2) == 16
21997 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
21998 return (GET_MODE_SIZE (mode1) == 16
21999 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
22000
22001 /* If MODE2 is appropriate for an MMX register, then tie
22002 with any other mode acceptable to MMX registers. */
22003 if (GET_MODE_SIZE (mode2) == 8
22004 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
22005 return (GET_MODE_SIZE (mode1) == 8
22006 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
22007
22008 return false;
22009 }
22010
22011 /* Compute a (partial) cost for rtx X. Return true if the complete
22012 cost has been computed, and false if subexpressions should be
22013 scanned. In either case, *TOTAL contains the cost result. */
22014
22015 static bool
22016 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
22017 {
22018 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22019 enum machine_mode mode = GET_MODE (x);
22020
22021 switch (code)
22022 {
22023 case CONST_INT:
22024 case CONST:
22025 case LABEL_REF:
22026 case SYMBOL_REF:
22027 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
22028 *total = 3;
22029 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
22030 *total = 2;
22031 else if (flag_pic && SYMBOLIC_CONST (x)
22032 && (!TARGET_64BIT
22033 || (!GET_CODE (x) != LABEL_REF
22034 && (GET_CODE (x) != SYMBOL_REF
22035 || !SYMBOL_REF_LOCAL_P (x)))))
22036 *total = 1;
22037 else
22038 *total = 0;
22039 return true;
22040
22041 case CONST_DOUBLE:
22042 if (mode == VOIDmode)
22043 *total = 0;
22044 else
22045 switch (standard_80387_constant_p (x))
22046 {
22047 case 1: /* 0.0 */
22048 *total = 1;
22049 break;
22050 default: /* Other constants */
22051 *total = 2;
22052 break;
22053 case 0:
22054 case -1:
22055 /* Start with (MEM (SYMBOL_REF)), since that's where
22056 it'll probably end up. Add a penalty for size. */
22057 *total = (COSTS_N_INSNS (1)
22058 + (flag_pic != 0 && !TARGET_64BIT)
22059 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
22060 break;
22061 }
22062 return true;
22063
22064 case ZERO_EXTEND:
22065 /* The zero extensions is often completely free on x86_64, so make
22066 it as cheap as possible. */
22067 if (TARGET_64BIT && mode == DImode
22068 && GET_MODE (XEXP (x, 0)) == SImode)
22069 *total = 1;
22070 else if (TARGET_ZERO_EXTEND_WITH_AND)
22071 *total = ix86_cost->add;
22072 else
22073 *total = ix86_cost->movzx;
22074 return false;
22075
22076 case SIGN_EXTEND:
22077 *total = ix86_cost->movsx;
22078 return false;
22079
22080 case ASHIFT:
22081 if (CONST_INT_P (XEXP (x, 1))
22082 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
22083 {
22084 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22085 if (value == 1)
22086 {
22087 *total = ix86_cost->add;
22088 return false;
22089 }
22090 if ((value == 2 || value == 3)
22091 && ix86_cost->lea <= ix86_cost->shift_const)
22092 {
22093 *total = ix86_cost->lea;
22094 return false;
22095 }
22096 }
22097 /* FALLTHRU */
22098
22099 case ROTATE:
22100 case ASHIFTRT:
22101 case LSHIFTRT:
22102 case ROTATERT:
22103 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
22104 {
22105 if (CONST_INT_P (XEXP (x, 1)))
22106 {
22107 if (INTVAL (XEXP (x, 1)) > 32)
22108 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
22109 else
22110 *total = ix86_cost->shift_const * 2;
22111 }
22112 else
22113 {
22114 if (GET_CODE (XEXP (x, 1)) == AND)
22115 *total = ix86_cost->shift_var * 2;
22116 else
22117 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
22118 }
22119 }
22120 else
22121 {
22122 if (CONST_INT_P (XEXP (x, 1)))
22123 *total = ix86_cost->shift_const;
22124 else
22125 *total = ix86_cost->shift_var;
22126 }
22127 return false;
22128
22129 case MULT:
22130 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22131 {
22132 /* ??? SSE scalar cost should be used here. */
22133 *total = ix86_cost->fmul;
22134 return false;
22135 }
22136 else if (X87_FLOAT_MODE_P (mode))
22137 {
22138 *total = ix86_cost->fmul;
22139 return false;
22140 }
22141 else if (FLOAT_MODE_P (mode))
22142 {
22143 /* ??? SSE vector cost should be used here. */
22144 *total = ix86_cost->fmul;
22145 return false;
22146 }
22147 else
22148 {
22149 rtx op0 = XEXP (x, 0);
22150 rtx op1 = XEXP (x, 1);
22151 int nbits;
22152 if (CONST_INT_P (XEXP (x, 1)))
22153 {
22154 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22155 for (nbits = 0; value != 0; value &= value - 1)
22156 nbits++;
22157 }
22158 else
22159 /* This is arbitrary. */
22160 nbits = 7;
22161
22162 /* Compute costs correctly for widening multiplication. */
22163 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
22164 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22165 == GET_MODE_SIZE (mode))
22166 {
22167 int is_mulwiden = 0;
22168 enum machine_mode inner_mode = GET_MODE (op0);
22169
22170 if (GET_CODE (op0) == GET_CODE (op1))
22171 is_mulwiden = 1, op1 = XEXP (op1, 0);
22172 else if (CONST_INT_P (op1))
22173 {
22174 if (GET_CODE (op0) == SIGN_EXTEND)
22175 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22176 == INTVAL (op1);
22177 else
22178 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22179 }
22180
22181 if (is_mulwiden)
22182 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22183 }
22184
22185 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
22186 + nbits * ix86_cost->mult_bit
22187 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
22188
22189 return true;
22190 }
22191
22192 case DIV:
22193 case UDIV:
22194 case MOD:
22195 case UMOD:
22196 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22197 /* ??? SSE cost should be used here. */
22198 *total = ix86_cost->fdiv;
22199 else if (X87_FLOAT_MODE_P (mode))
22200 *total = ix86_cost->fdiv;
22201 else if (FLOAT_MODE_P (mode))
22202 /* ??? SSE vector cost should be used here. */
22203 *total = ix86_cost->fdiv;
22204 else
22205 *total = ix86_cost->divide[MODE_INDEX (mode)];
22206 return false;
22207
22208 case PLUS:
22209 if (GET_MODE_CLASS (mode) == MODE_INT
22210 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
22211 {
22212 if (GET_CODE (XEXP (x, 0)) == PLUS
22213 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22214 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22215 && CONSTANT_P (XEXP (x, 1)))
22216 {
22217 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22218 if (val == 2 || val == 4 || val == 8)
22219 {
22220 *total = ix86_cost->lea;
22221 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22222 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
22223 outer_code);
22224 *total += rtx_cost (XEXP (x, 1), outer_code);
22225 return true;
22226 }
22227 }
22228 else if (GET_CODE (XEXP (x, 0)) == MULT
22229 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22230 {
22231 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22232 if (val == 2 || val == 4 || val == 8)
22233 {
22234 *total = ix86_cost->lea;
22235 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22236 *total += rtx_cost (XEXP (x, 1), outer_code);
22237 return true;
22238 }
22239 }
22240 else if (GET_CODE (XEXP (x, 0)) == PLUS)
22241 {
22242 *total = ix86_cost->lea;
22243 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22244 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22245 *total += rtx_cost (XEXP (x, 1), outer_code);
22246 return true;
22247 }
22248 }
22249 /* FALLTHRU */
22250
22251 case MINUS:
22252 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22253 {
22254 /* ??? SSE cost should be used here. */
22255 *total = ix86_cost->fadd;
22256 return false;
22257 }
22258 else if (X87_FLOAT_MODE_P (mode))
22259 {
22260 *total = ix86_cost->fadd;
22261 return false;
22262 }
22263 else if (FLOAT_MODE_P (mode))
22264 {
22265 /* ??? SSE vector cost should be used here. */
22266 *total = ix86_cost->fadd;
22267 return false;
22268 }
22269 /* FALLTHRU */
22270
22271 case AND:
22272 case IOR:
22273 case XOR:
22274 if (!TARGET_64BIT && mode == DImode)
22275 {
22276 *total = (ix86_cost->add * 2
22277 + (rtx_cost (XEXP (x, 0), outer_code)
22278 << (GET_MODE (XEXP (x, 0)) != DImode))
22279 + (rtx_cost (XEXP (x, 1), outer_code)
22280 << (GET_MODE (XEXP (x, 1)) != DImode)));
22281 return true;
22282 }
22283 /* FALLTHRU */
22284
22285 case NEG:
22286 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22287 {
22288 /* ??? SSE cost should be used here. */
22289 *total = ix86_cost->fchs;
22290 return false;
22291 }
22292 else if (X87_FLOAT_MODE_P (mode))
22293 {
22294 *total = ix86_cost->fchs;
22295 return false;
22296 }
22297 else if (FLOAT_MODE_P (mode))
22298 {
22299 /* ??? SSE vector cost should be used here. */
22300 *total = ix86_cost->fchs;
22301 return false;
22302 }
22303 /* FALLTHRU */
22304
22305 case NOT:
22306 if (!TARGET_64BIT && mode == DImode)
22307 *total = ix86_cost->add * 2;
22308 else
22309 *total = ix86_cost->add;
22310 return false;
22311
22312 case COMPARE:
22313 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
22314 && XEXP (XEXP (x, 0), 1) == const1_rtx
22315 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
22316 && XEXP (x, 1) == const0_rtx)
22317 {
22318 /* This kind of construct is implemented using test[bwl].
22319 Treat it as if we had an AND. */
22320 *total = (ix86_cost->add
22321 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
22322 + rtx_cost (const1_rtx, outer_code));
22323 return true;
22324 }
22325 return false;
22326
22327 case FLOAT_EXTEND:
22328 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
22329 *total = 0;
22330 return false;
22331
22332 case ABS:
22333 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22334 /* ??? SSE cost should be used here. */
22335 *total = ix86_cost->fabs;
22336 else if (X87_FLOAT_MODE_P (mode))
22337 *total = ix86_cost->fabs;
22338 else if (FLOAT_MODE_P (mode))
22339 /* ??? SSE vector cost should be used here. */
22340 *total = ix86_cost->fabs;
22341 return false;
22342
22343 case SQRT:
22344 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22345 /* ??? SSE cost should be used here. */
22346 *total = ix86_cost->fsqrt;
22347 else if (X87_FLOAT_MODE_P (mode))
22348 *total = ix86_cost->fsqrt;
22349 else if (FLOAT_MODE_P (mode))
22350 /* ??? SSE vector cost should be used here. */
22351 *total = ix86_cost->fsqrt;
22352 return false;
22353
22354 case UNSPEC:
22355 if (XINT (x, 1) == UNSPEC_TP)
22356 *total = 0;
22357 return false;
22358
22359 default:
22360 return false;
22361 }
22362 }
22363
22364 #if TARGET_MACHO
22365
22366 static int current_machopic_label_num;
22367
22368 /* Given a symbol name and its associated stub, write out the
22369 definition of the stub. */
22370
22371 void
22372 machopic_output_stub (FILE *file, const char *symb, const char *stub)
22373 {
22374 unsigned int length;
22375 char *binder_name, *symbol_name, lazy_ptr_name[32];
22376 int label = ++current_machopic_label_num;
22377
22378 /* For 64-bit we shouldn't get here. */
22379 gcc_assert (!TARGET_64BIT);
22380
22381 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22382 symb = (*targetm.strip_name_encoding) (symb);
22383
22384 length = strlen (stub);
22385 binder_name = alloca (length + 32);
22386 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22387
22388 length = strlen (symb);
22389 symbol_name = alloca (length + 32);
22390 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22391
22392 sprintf (lazy_ptr_name, "L%d$lz", label);
22393
22394 if (MACHOPIC_PURE)
22395 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
22396 else
22397 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
22398
22399 fprintf (file, "%s:\n", stub);
22400 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22401
22402 if (MACHOPIC_PURE)
22403 {
22404 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
22405 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
22406 fprintf (file, "\tjmp\t*%%edx\n");
22407 }
22408 else
22409 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
22410
22411 fprintf (file, "%s:\n", binder_name);
22412
22413 if (MACHOPIC_PURE)
22414 {
22415 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
22416 fprintf (file, "\tpushl\t%%eax\n");
22417 }
22418 else
22419 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
22420
22421 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
22422
22423 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
22424 fprintf (file, "%s:\n", lazy_ptr_name);
22425 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22426 fprintf (file, "\t.long %s\n", binder_name);
22427 }
22428
22429 void
22430 darwin_x86_file_end (void)
22431 {
22432 darwin_file_end ();
22433 ix86_file_end ();
22434 }
22435 #endif /* TARGET_MACHO */
22436
22437 /* Order the registers for register allocator. */
22438
22439 void
22440 x86_order_regs_for_local_alloc (void)
22441 {
22442 int pos = 0;
22443 int i;
22444
22445 /* First allocate the local general purpose registers. */
22446 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22447 if (GENERAL_REGNO_P (i) && call_used_regs[i])
22448 reg_alloc_order [pos++] = i;
22449
22450 /* Global general purpose registers. */
22451 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22452 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
22453 reg_alloc_order [pos++] = i;
22454
22455 /* x87 registers come first in case we are doing FP math
22456 using them. */
22457 if (!TARGET_SSE_MATH)
22458 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22459 reg_alloc_order [pos++] = i;
22460
22461 /* SSE registers. */
22462 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
22463 reg_alloc_order [pos++] = i;
22464 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
22465 reg_alloc_order [pos++] = i;
22466
22467 /* x87 registers. */
22468 if (TARGET_SSE_MATH)
22469 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22470 reg_alloc_order [pos++] = i;
22471
22472 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
22473 reg_alloc_order [pos++] = i;
22474
22475 /* Initialize the rest of array as we do not allocate some registers
22476 at all. */
22477 while (pos < FIRST_PSEUDO_REGISTER)
22478 reg_alloc_order [pos++] = 0;
22479 }
22480
22481 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
22482 struct attribute_spec.handler. */
22483 static tree
22484 ix86_handle_struct_attribute (tree *node, tree name,
22485 tree args ATTRIBUTE_UNUSED,
22486 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
22487 {
22488 tree *type = NULL;
22489 if (DECL_P (*node))
22490 {
22491 if (TREE_CODE (*node) == TYPE_DECL)
22492 type = &TREE_TYPE (*node);
22493 }
22494 else
22495 type = node;
22496
22497 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
22498 || TREE_CODE (*type) == UNION_TYPE)))
22499 {
22500 warning (OPT_Wattributes, "%qs attribute ignored",
22501 IDENTIFIER_POINTER (name));
22502 *no_add_attrs = true;
22503 }
22504
22505 else if ((is_attribute_p ("ms_struct", name)
22506 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
22507 || ((is_attribute_p ("gcc_struct", name)
22508 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
22509 {
22510 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
22511 IDENTIFIER_POINTER (name));
22512 *no_add_attrs = true;
22513 }
22514
22515 return NULL_TREE;
22516 }
22517
22518 static bool
22519 ix86_ms_bitfield_layout_p (const_tree record_type)
22520 {
22521 return (TARGET_MS_BITFIELD_LAYOUT &&
22522 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
22523 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
22524 }
22525
22526 /* Returns an expression indicating where the this parameter is
22527 located on entry to the FUNCTION. */
22528
22529 static rtx
22530 x86_this_parameter (tree function)
22531 {
22532 tree type = TREE_TYPE (function);
22533 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
22534
22535 if (TARGET_64BIT)
22536 {
22537 const int *parm_regs;
22538
22539 if (TARGET_64BIT_MS_ABI)
22540 parm_regs = x86_64_ms_abi_int_parameter_registers;
22541 else
22542 parm_regs = x86_64_int_parameter_registers;
22543 return gen_rtx_REG (DImode, parm_regs[aggr]);
22544 }
22545
22546 if (ix86_function_regparm (type, function) > 0 && !stdarg_p (type))
22547 {
22548 int regno = 0;
22549 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
22550 regno = 2;
22551 return gen_rtx_REG (SImode, regno);
22552 }
22553
22554 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
22555 }
22556
22557 /* Determine whether x86_output_mi_thunk can succeed. */
22558
22559 static bool
22560 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
22561 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
22562 HOST_WIDE_INT vcall_offset, const_tree function)
22563 {
22564 /* 64-bit can handle anything. */
22565 if (TARGET_64BIT)
22566 return true;
22567
22568 /* For 32-bit, everything's fine if we have one free register. */
22569 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
22570 return true;
22571
22572 /* Need a free register for vcall_offset. */
22573 if (vcall_offset)
22574 return false;
22575
22576 /* Need a free register for GOT references. */
22577 if (flag_pic && !(*targetm.binds_local_p) (function))
22578 return false;
22579
22580 /* Otherwise ok. */
22581 return true;
22582 }
22583
22584 /* Output the assembler code for a thunk function. THUNK_DECL is the
22585 declaration for the thunk function itself, FUNCTION is the decl for
22586 the target function. DELTA is an immediate constant offset to be
22587 added to THIS. If VCALL_OFFSET is nonzero, the word at
22588 *(*this + vcall_offset) should be added to THIS. */
22589
22590 static void
22591 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
22592 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
22593 HOST_WIDE_INT vcall_offset, tree function)
22594 {
22595 rtx xops[3];
22596 rtx this_param = x86_this_parameter (function);
22597 rtx this_reg, tmp;
22598
22599 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
22600 pull it in now and let DELTA benefit. */
22601 if (REG_P (this_param))
22602 this_reg = this_param;
22603 else if (vcall_offset)
22604 {
22605 /* Put the this parameter into %eax. */
22606 xops[0] = this_param;
22607 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
22608 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22609 }
22610 else
22611 this_reg = NULL_RTX;
22612
22613 /* Adjust the this parameter by a fixed constant. */
22614 if (delta)
22615 {
22616 xops[0] = GEN_INT (delta);
22617 xops[1] = this_reg ? this_reg : this_param;
22618 if (TARGET_64BIT)
22619 {
22620 if (!x86_64_general_operand (xops[0], DImode))
22621 {
22622 tmp = gen_rtx_REG (DImode, R10_REG);
22623 xops[1] = tmp;
22624 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
22625 xops[0] = tmp;
22626 xops[1] = this_param;
22627 }
22628 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
22629 }
22630 else
22631 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
22632 }
22633
22634 /* Adjust the this parameter by a value stored in the vtable. */
22635 if (vcall_offset)
22636 {
22637 if (TARGET_64BIT)
22638 tmp = gen_rtx_REG (DImode, R10_REG);
22639 else
22640 {
22641 int tmp_regno = 2 /* ECX */;
22642 if (lookup_attribute ("fastcall",
22643 TYPE_ATTRIBUTES (TREE_TYPE (function))))
22644 tmp_regno = 0 /* EAX */;
22645 tmp = gen_rtx_REG (SImode, tmp_regno);
22646 }
22647
22648 xops[0] = gen_rtx_MEM (Pmode, this_reg);
22649 xops[1] = tmp;
22650 if (TARGET_64BIT)
22651 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
22652 else
22653 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22654
22655 /* Adjust the this parameter. */
22656 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
22657 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
22658 {
22659 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
22660 xops[0] = GEN_INT (vcall_offset);
22661 xops[1] = tmp2;
22662 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
22663 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
22664 }
22665 xops[1] = this_reg;
22666 if (TARGET_64BIT)
22667 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
22668 else
22669 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
22670 }
22671
22672 /* If necessary, drop THIS back to its stack slot. */
22673 if (this_reg && this_reg != this_param)
22674 {
22675 xops[0] = this_reg;
22676 xops[1] = this_param;
22677 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22678 }
22679
22680 xops[0] = XEXP (DECL_RTL (function), 0);
22681 if (TARGET_64BIT)
22682 {
22683 if (!flag_pic || (*targetm.binds_local_p) (function))
22684 output_asm_insn ("jmp\t%P0", xops);
22685 /* All thunks should be in the same object as their target,
22686 and thus binds_local_p should be true. */
22687 else if (TARGET_64BIT_MS_ABI)
22688 gcc_unreachable ();
22689 else
22690 {
22691 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
22692 tmp = gen_rtx_CONST (Pmode, tmp);
22693 tmp = gen_rtx_MEM (QImode, tmp);
22694 xops[0] = tmp;
22695 output_asm_insn ("jmp\t%A0", xops);
22696 }
22697 }
22698 else
22699 {
22700 if (!flag_pic || (*targetm.binds_local_p) (function))
22701 output_asm_insn ("jmp\t%P0", xops);
22702 else
22703 #if TARGET_MACHO
22704 if (TARGET_MACHO)
22705 {
22706 rtx sym_ref = XEXP (DECL_RTL (function), 0);
22707 tmp = (gen_rtx_SYMBOL_REF
22708 (Pmode,
22709 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
22710 tmp = gen_rtx_MEM (QImode, tmp);
22711 xops[0] = tmp;
22712 output_asm_insn ("jmp\t%0", xops);
22713 }
22714 else
22715 #endif /* TARGET_MACHO */
22716 {
22717 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
22718 output_set_got (tmp, NULL_RTX);
22719
22720 xops[1] = tmp;
22721 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
22722 output_asm_insn ("jmp\t{*}%1", xops);
22723 }
22724 }
22725 }
22726
22727 static void
22728 x86_file_start (void)
22729 {
22730 default_file_start ();
22731 #if TARGET_MACHO
22732 darwin_file_start ();
22733 #endif
22734 if (X86_FILE_START_VERSION_DIRECTIVE)
22735 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
22736 if (X86_FILE_START_FLTUSED)
22737 fputs ("\t.global\t__fltused\n", asm_out_file);
22738 if (ix86_asm_dialect == ASM_INTEL)
22739 fputs ("\t.intel_syntax\n", asm_out_file);
22740 }
22741
22742 int
22743 x86_field_alignment (tree field, int computed)
22744 {
22745 enum machine_mode mode;
22746 tree type = TREE_TYPE (field);
22747
22748 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
22749 return computed;
22750 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
22751 ? get_inner_array_type (type) : type);
22752 if (mode == DFmode || mode == DCmode
22753 || GET_MODE_CLASS (mode) == MODE_INT
22754 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
22755 return MIN (32, computed);
22756 return computed;
22757 }
22758
22759 /* Output assembler code to FILE to increment profiler label # LABELNO
22760 for profiling a function entry. */
22761 void
22762 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
22763 {
22764 if (TARGET_64BIT)
22765 {
22766 #ifndef NO_PROFILE_COUNTERS
22767 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
22768 #endif
22769
22770 if (!TARGET_64BIT_MS_ABI && flag_pic)
22771 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
22772 else
22773 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
22774 }
22775 else if (flag_pic)
22776 {
22777 #ifndef NO_PROFILE_COUNTERS
22778 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
22779 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
22780 #endif
22781 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
22782 }
22783 else
22784 {
22785 #ifndef NO_PROFILE_COUNTERS
22786 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
22787 PROFILE_COUNT_REGISTER);
22788 #endif
22789 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
22790 }
22791 }
22792
22793 /* We don't have exact information about the insn sizes, but we may assume
22794 quite safely that we are informed about all 1 byte insns and memory
22795 address sizes. This is enough to eliminate unnecessary padding in
22796 99% of cases. */
22797
22798 static int
22799 min_insn_size (rtx insn)
22800 {
22801 int l = 0;
22802
22803 if (!INSN_P (insn) || !active_insn_p (insn))
22804 return 0;
22805
22806 /* Discard alignments we've emit and jump instructions. */
22807 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
22808 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
22809 return 0;
22810 if (JUMP_P (insn)
22811 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
22812 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
22813 return 0;
22814
22815 /* Important case - calls are always 5 bytes.
22816 It is common to have many calls in the row. */
22817 if (CALL_P (insn)
22818 && symbolic_reference_mentioned_p (PATTERN (insn))
22819 && !SIBLING_CALL_P (insn))
22820 return 5;
22821 if (get_attr_length (insn) <= 1)
22822 return 1;
22823
22824 /* For normal instructions we may rely on the sizes of addresses
22825 and the presence of symbol to require 4 bytes of encoding.
22826 This is not the case for jumps where references are PC relative. */
22827 if (!JUMP_P (insn))
22828 {
22829 l = get_attr_length_address (insn);
22830 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
22831 l = 4;
22832 }
22833 if (l)
22834 return 1+l;
22835 else
22836 return 2;
22837 }
22838
22839 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
22840 window. */
22841
22842 static void
22843 ix86_avoid_jump_misspredicts (void)
22844 {
22845 rtx insn, start = get_insns ();
22846 int nbytes = 0, njumps = 0;
22847 int isjump = 0;
22848
22849 /* Look for all minimal intervals of instructions containing 4 jumps.
22850 The intervals are bounded by START and INSN. NBYTES is the total
22851 size of instructions in the interval including INSN and not including
22852 START. When the NBYTES is smaller than 16 bytes, it is possible
22853 that the end of START and INSN ends up in the same 16byte page.
22854
22855 The smallest offset in the page INSN can start is the case where START
22856 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
22857 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
22858 */
22859 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22860 {
22861
22862 nbytes += min_insn_size (insn);
22863 if (dump_file)
22864 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
22865 INSN_UID (insn), min_insn_size (insn));
22866 if ((JUMP_P (insn)
22867 && GET_CODE (PATTERN (insn)) != ADDR_VEC
22868 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
22869 || CALL_P (insn))
22870 njumps++;
22871 else
22872 continue;
22873
22874 while (njumps > 3)
22875 {
22876 start = NEXT_INSN (start);
22877 if ((JUMP_P (start)
22878 && GET_CODE (PATTERN (start)) != ADDR_VEC
22879 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
22880 || CALL_P (start))
22881 njumps--, isjump = 1;
22882 else
22883 isjump = 0;
22884 nbytes -= min_insn_size (start);
22885 }
22886 gcc_assert (njumps >= 0);
22887 if (dump_file)
22888 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
22889 INSN_UID (start), INSN_UID (insn), nbytes);
22890
22891 if (njumps == 3 && isjump && nbytes < 16)
22892 {
22893 int padsize = 15 - nbytes + min_insn_size (insn);
22894
22895 if (dump_file)
22896 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
22897 INSN_UID (insn), padsize);
22898 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
22899 }
22900 }
22901 }
22902
22903 /* AMD Athlon works faster
22904 when RET is not destination of conditional jump or directly preceded
22905 by other jump instruction. We avoid the penalty by inserting NOP just
22906 before the RET instructions in such cases. */
22907 static void
22908 ix86_pad_returns (void)
22909 {
22910 edge e;
22911 edge_iterator ei;
22912
22913 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
22914 {
22915 basic_block bb = e->src;
22916 rtx ret = BB_END (bb);
22917 rtx prev;
22918 bool replace = false;
22919
22920 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
22921 || !maybe_hot_bb_p (bb))
22922 continue;
22923 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
22924 if (active_insn_p (prev) || LABEL_P (prev))
22925 break;
22926 if (prev && LABEL_P (prev))
22927 {
22928 edge e;
22929 edge_iterator ei;
22930
22931 FOR_EACH_EDGE (e, ei, bb->preds)
22932 if (EDGE_FREQUENCY (e) && e->src->index >= 0
22933 && !(e->flags & EDGE_FALLTHRU))
22934 replace = true;
22935 }
22936 if (!replace)
22937 {
22938 prev = prev_active_insn (ret);
22939 if (prev
22940 && ((JUMP_P (prev) && any_condjump_p (prev))
22941 || CALL_P (prev)))
22942 replace = true;
22943 /* Empty functions get branch mispredict even when the jump destination
22944 is not visible to us. */
22945 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
22946 replace = true;
22947 }
22948 if (replace)
22949 {
22950 emit_insn_before (gen_return_internal_long (), ret);
22951 delete_insn (ret);
22952 }
22953 }
22954 }
22955
22956 /* Implement machine specific optimizations. We implement padding of returns
22957 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
22958 static void
22959 ix86_reorg (void)
22960 {
22961 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
22962 ix86_pad_returns ();
22963 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
22964 ix86_avoid_jump_misspredicts ();
22965 }
22966
22967 /* Return nonzero when QImode register that must be represented via REX prefix
22968 is used. */
22969 bool
22970 x86_extended_QIreg_mentioned_p (rtx insn)
22971 {
22972 int i;
22973 extract_insn_cached (insn);
22974 for (i = 0; i < recog_data.n_operands; i++)
22975 if (REG_P (recog_data.operand[i])
22976 && REGNO (recog_data.operand[i]) >= 4)
22977 return true;
22978 return false;
22979 }
22980
22981 /* Return nonzero when P points to register encoded via REX prefix.
22982 Called via for_each_rtx. */
22983 static int
22984 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
22985 {
22986 unsigned int regno;
22987 if (!REG_P (*p))
22988 return 0;
22989 regno = REGNO (*p);
22990 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
22991 }
22992
22993 /* Return true when INSN mentions register that must be encoded using REX
22994 prefix. */
22995 bool
22996 x86_extended_reg_mentioned_p (rtx insn)
22997 {
22998 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
22999 }
23000
23001 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23002 optabs would emit if we didn't have TFmode patterns. */
23003
23004 void
23005 x86_emit_floatuns (rtx operands[2])
23006 {
23007 rtx neglab, donelab, i0, i1, f0, in, out;
23008 enum machine_mode mode, inmode;
23009
23010 inmode = GET_MODE (operands[1]);
23011 gcc_assert (inmode == SImode || inmode == DImode);
23012
23013 out = operands[0];
23014 in = force_reg (inmode, operands[1]);
23015 mode = GET_MODE (out);
23016 neglab = gen_label_rtx ();
23017 donelab = gen_label_rtx ();
23018 f0 = gen_reg_rtx (mode);
23019
23020 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23021
23022 expand_float (out, in, 0);
23023
23024 emit_jump_insn (gen_jump (donelab));
23025 emit_barrier ();
23026
23027 emit_label (neglab);
23028
23029 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23030 1, OPTAB_DIRECT);
23031 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23032 1, OPTAB_DIRECT);
23033 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23034
23035 expand_float (f0, i0, 0);
23036
23037 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
23038
23039 emit_label (donelab);
23040 }
23041 \f
23042 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23043 with all elements equal to VAR. Return true if successful. */
23044
23045 static bool
23046 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
23047 rtx target, rtx val)
23048 {
23049 enum machine_mode smode, wsmode, wvmode;
23050 rtx x;
23051
23052 switch (mode)
23053 {
23054 case V2SImode:
23055 case V2SFmode:
23056 if (!mmx_ok)
23057 return false;
23058 /* FALLTHRU */
23059
23060 case V2DFmode:
23061 case V2DImode:
23062 case V4SFmode:
23063 case V4SImode:
23064 val = force_reg (GET_MODE_INNER (mode), val);
23065 x = gen_rtx_VEC_DUPLICATE (mode, val);
23066 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23067 return true;
23068
23069 case V4HImode:
23070 if (!mmx_ok)
23071 return false;
23072 if (TARGET_SSE || TARGET_3DNOW_A)
23073 {
23074 val = gen_lowpart (SImode, val);
23075 x = gen_rtx_TRUNCATE (HImode, val);
23076 x = gen_rtx_VEC_DUPLICATE (mode, x);
23077 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23078 return true;
23079 }
23080 else
23081 {
23082 smode = HImode;
23083 wsmode = SImode;
23084 wvmode = V2SImode;
23085 goto widen;
23086 }
23087
23088 case V8QImode:
23089 if (!mmx_ok)
23090 return false;
23091 smode = QImode;
23092 wsmode = HImode;
23093 wvmode = V4HImode;
23094 goto widen;
23095 case V8HImode:
23096 if (TARGET_SSE2)
23097 {
23098 rtx tmp1, tmp2;
23099 /* Extend HImode to SImode using a paradoxical SUBREG. */
23100 tmp1 = gen_reg_rtx (SImode);
23101 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23102 /* Insert the SImode value as low element of V4SImode vector. */
23103 tmp2 = gen_reg_rtx (V4SImode);
23104 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23105 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23106 CONST0_RTX (V4SImode),
23107 const1_rtx);
23108 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23109 /* Cast the V4SImode vector back to a V8HImode vector. */
23110 tmp1 = gen_reg_rtx (V8HImode);
23111 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
23112 /* Duplicate the low short through the whole low SImode word. */
23113 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
23114 /* Cast the V8HImode vector back to a V4SImode vector. */
23115 tmp2 = gen_reg_rtx (V4SImode);
23116 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23117 /* Replicate the low element of the V4SImode vector. */
23118 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23119 /* Cast the V2SImode back to V8HImode, and store in target. */
23120 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
23121 return true;
23122 }
23123 smode = HImode;
23124 wsmode = SImode;
23125 wvmode = V4SImode;
23126 goto widen;
23127 case V16QImode:
23128 if (TARGET_SSE2)
23129 {
23130 rtx tmp1, tmp2;
23131 /* Extend QImode to SImode using a paradoxical SUBREG. */
23132 tmp1 = gen_reg_rtx (SImode);
23133 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23134 /* Insert the SImode value as low element of V4SImode vector. */
23135 tmp2 = gen_reg_rtx (V4SImode);
23136 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23137 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23138 CONST0_RTX (V4SImode),
23139 const1_rtx);
23140 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23141 /* Cast the V4SImode vector back to a V16QImode vector. */
23142 tmp1 = gen_reg_rtx (V16QImode);
23143 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
23144 /* Duplicate the low byte through the whole low SImode word. */
23145 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23146 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23147 /* Cast the V16QImode vector back to a V4SImode vector. */
23148 tmp2 = gen_reg_rtx (V4SImode);
23149 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23150 /* Replicate the low element of the V4SImode vector. */
23151 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23152 /* Cast the V2SImode back to V16QImode, and store in target. */
23153 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
23154 return true;
23155 }
23156 smode = QImode;
23157 wsmode = HImode;
23158 wvmode = V8HImode;
23159 goto widen;
23160 widen:
23161 /* Replicate the value once into the next wider mode and recurse. */
23162 val = convert_modes (wsmode, smode, val, true);
23163 x = expand_simple_binop (wsmode, ASHIFT, val,
23164 GEN_INT (GET_MODE_BITSIZE (smode)),
23165 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23166 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
23167
23168 x = gen_reg_rtx (wvmode);
23169 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
23170 gcc_unreachable ();
23171 emit_move_insn (target, gen_lowpart (mode, x));
23172 return true;
23173
23174 default:
23175 return false;
23176 }
23177 }
23178
23179 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23180 whose ONE_VAR element is VAR, and other elements are zero. Return true
23181 if successful. */
23182
23183 static bool
23184 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
23185 rtx target, rtx var, int one_var)
23186 {
23187 enum machine_mode vsimode;
23188 rtx new_target;
23189 rtx x, tmp;
23190
23191 switch (mode)
23192 {
23193 case V2SFmode:
23194 case V2SImode:
23195 if (!mmx_ok)
23196 return false;
23197 /* FALLTHRU */
23198
23199 case V2DFmode:
23200 case V2DImode:
23201 if (one_var != 0)
23202 return false;
23203 var = force_reg (GET_MODE_INNER (mode), var);
23204 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
23205 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23206 return true;
23207
23208 case V4SFmode:
23209 case V4SImode:
23210 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
23211 new_target = gen_reg_rtx (mode);
23212 else
23213 new_target = target;
23214 var = force_reg (GET_MODE_INNER (mode), var);
23215 x = gen_rtx_VEC_DUPLICATE (mode, var);
23216 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
23217 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
23218 if (one_var != 0)
23219 {
23220 /* We need to shuffle the value to the correct position, so
23221 create a new pseudo to store the intermediate result. */
23222
23223 /* With SSE2, we can use the integer shuffle insns. */
23224 if (mode != V4SFmode && TARGET_SSE2)
23225 {
23226 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
23227 GEN_INT (1),
23228 GEN_INT (one_var == 1 ? 0 : 1),
23229 GEN_INT (one_var == 2 ? 0 : 1),
23230 GEN_INT (one_var == 3 ? 0 : 1)));
23231 if (target != new_target)
23232 emit_move_insn (target, new_target);
23233 return true;
23234 }
23235
23236 /* Otherwise convert the intermediate result to V4SFmode and
23237 use the SSE1 shuffle instructions. */
23238 if (mode != V4SFmode)
23239 {
23240 tmp = gen_reg_rtx (V4SFmode);
23241 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
23242 }
23243 else
23244 tmp = new_target;
23245
23246 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
23247 GEN_INT (1),
23248 GEN_INT (one_var == 1 ? 0 : 1),
23249 GEN_INT (one_var == 2 ? 0+4 : 1+4),
23250 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
23251
23252 if (mode != V4SFmode)
23253 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
23254 else if (tmp != target)
23255 emit_move_insn (target, tmp);
23256 }
23257 else if (target != new_target)
23258 emit_move_insn (target, new_target);
23259 return true;
23260
23261 case V8HImode:
23262 case V16QImode:
23263 vsimode = V4SImode;
23264 goto widen;
23265 case V4HImode:
23266 case V8QImode:
23267 if (!mmx_ok)
23268 return false;
23269 vsimode = V2SImode;
23270 goto widen;
23271 widen:
23272 if (one_var != 0)
23273 return false;
23274
23275 /* Zero extend the variable element to SImode and recurse. */
23276 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
23277
23278 x = gen_reg_rtx (vsimode);
23279 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
23280 var, one_var))
23281 gcc_unreachable ();
23282
23283 emit_move_insn (target, gen_lowpart (mode, x));
23284 return true;
23285
23286 default:
23287 return false;
23288 }
23289 }
23290
23291 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23292 consisting of the values in VALS. It is known that all elements
23293 except ONE_VAR are constants. Return true if successful. */
23294
23295 static bool
23296 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
23297 rtx target, rtx vals, int one_var)
23298 {
23299 rtx var = XVECEXP (vals, 0, one_var);
23300 enum machine_mode wmode;
23301 rtx const_vec, x;
23302
23303 const_vec = copy_rtx (vals);
23304 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
23305 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
23306
23307 switch (mode)
23308 {
23309 case V2DFmode:
23310 case V2DImode:
23311 case V2SFmode:
23312 case V2SImode:
23313 /* For the two element vectors, it's just as easy to use
23314 the general case. */
23315 return false;
23316
23317 case V4SFmode:
23318 case V4SImode:
23319 case V8HImode:
23320 case V4HImode:
23321 break;
23322
23323 case V16QImode:
23324 wmode = V8HImode;
23325 goto widen;
23326 case V8QImode:
23327 wmode = V4HImode;
23328 goto widen;
23329 widen:
23330 /* There's no way to set one QImode entry easily. Combine
23331 the variable value with its adjacent constant value, and
23332 promote to an HImode set. */
23333 x = XVECEXP (vals, 0, one_var ^ 1);
23334 if (one_var & 1)
23335 {
23336 var = convert_modes (HImode, QImode, var, true);
23337 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
23338 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23339 x = GEN_INT (INTVAL (x) & 0xff);
23340 }
23341 else
23342 {
23343 var = convert_modes (HImode, QImode, var, true);
23344 x = gen_int_mode (INTVAL (x) << 8, HImode);
23345 }
23346 if (x != const0_rtx)
23347 var = expand_simple_binop (HImode, IOR, var, x, var,
23348 1, OPTAB_LIB_WIDEN);
23349
23350 x = gen_reg_rtx (wmode);
23351 emit_move_insn (x, gen_lowpart (wmode, const_vec));
23352 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
23353
23354 emit_move_insn (target, gen_lowpart (mode, x));
23355 return true;
23356
23357 default:
23358 return false;
23359 }
23360
23361 emit_move_insn (target, const_vec);
23362 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23363 return true;
23364 }
23365
23366 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
23367 all values variable, and none identical. */
23368
23369 static void
23370 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
23371 rtx target, rtx vals)
23372 {
23373 enum machine_mode half_mode = GET_MODE_INNER (mode);
23374 rtx op0 = NULL, op1 = NULL;
23375 bool use_vec_concat = false;
23376
23377 switch (mode)
23378 {
23379 case V2SFmode:
23380 case V2SImode:
23381 if (!mmx_ok && !TARGET_SSE)
23382 break;
23383 /* FALLTHRU */
23384
23385 case V2DFmode:
23386 case V2DImode:
23387 /* For the two element vectors, we always implement VEC_CONCAT. */
23388 op0 = XVECEXP (vals, 0, 0);
23389 op1 = XVECEXP (vals, 0, 1);
23390 use_vec_concat = true;
23391 break;
23392
23393 case V4SFmode:
23394 half_mode = V2SFmode;
23395 goto half;
23396 case V4SImode:
23397 half_mode = V2SImode;
23398 goto half;
23399 half:
23400 {
23401 rtvec v;
23402
23403 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
23404 Recurse to load the two halves. */
23405
23406 op0 = gen_reg_rtx (half_mode);
23407 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
23408 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
23409
23410 op1 = gen_reg_rtx (half_mode);
23411 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
23412 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
23413
23414 use_vec_concat = true;
23415 }
23416 break;
23417
23418 case V8HImode:
23419 case V16QImode:
23420 case V4HImode:
23421 case V8QImode:
23422 break;
23423
23424 default:
23425 gcc_unreachable ();
23426 }
23427
23428 if (use_vec_concat)
23429 {
23430 if (!register_operand (op0, half_mode))
23431 op0 = force_reg (half_mode, op0);
23432 if (!register_operand (op1, half_mode))
23433 op1 = force_reg (half_mode, op1);
23434
23435 emit_insn (gen_rtx_SET (VOIDmode, target,
23436 gen_rtx_VEC_CONCAT (mode, op0, op1)));
23437 }
23438 else
23439 {
23440 int i, j, n_elts, n_words, n_elt_per_word;
23441 enum machine_mode inner_mode;
23442 rtx words[4], shift;
23443
23444 inner_mode = GET_MODE_INNER (mode);
23445 n_elts = GET_MODE_NUNITS (mode);
23446 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
23447 n_elt_per_word = n_elts / n_words;
23448 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
23449
23450 for (i = 0; i < n_words; ++i)
23451 {
23452 rtx word = NULL_RTX;
23453
23454 for (j = 0; j < n_elt_per_word; ++j)
23455 {
23456 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
23457 elt = convert_modes (word_mode, inner_mode, elt, true);
23458
23459 if (j == 0)
23460 word = elt;
23461 else
23462 {
23463 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
23464 word, 1, OPTAB_LIB_WIDEN);
23465 word = expand_simple_binop (word_mode, IOR, word, elt,
23466 word, 1, OPTAB_LIB_WIDEN);
23467 }
23468 }
23469
23470 words[i] = word;
23471 }
23472
23473 if (n_words == 1)
23474 emit_move_insn (target, gen_lowpart (mode, words[0]));
23475 else if (n_words == 2)
23476 {
23477 rtx tmp = gen_reg_rtx (mode);
23478 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
23479 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
23480 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
23481 emit_move_insn (target, tmp);
23482 }
23483 else if (n_words == 4)
23484 {
23485 rtx tmp = gen_reg_rtx (V4SImode);
23486 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
23487 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
23488 emit_move_insn (target, gen_lowpart (mode, tmp));
23489 }
23490 else
23491 gcc_unreachable ();
23492 }
23493 }
23494
23495 /* Initialize vector TARGET via VALS. Suppress the use of MMX
23496 instructions unless MMX_OK is true. */
23497
23498 void
23499 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
23500 {
23501 enum machine_mode mode = GET_MODE (target);
23502 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23503 int n_elts = GET_MODE_NUNITS (mode);
23504 int n_var = 0, one_var = -1;
23505 bool all_same = true, all_const_zero = true;
23506 int i;
23507 rtx x;
23508
23509 for (i = 0; i < n_elts; ++i)
23510 {
23511 x = XVECEXP (vals, 0, i);
23512 if (!CONSTANT_P (x))
23513 n_var++, one_var = i;
23514 else if (x != CONST0_RTX (inner_mode))
23515 all_const_zero = false;
23516 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
23517 all_same = false;
23518 }
23519
23520 /* Constants are best loaded from the constant pool. */
23521 if (n_var == 0)
23522 {
23523 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
23524 return;
23525 }
23526
23527 /* If all values are identical, broadcast the value. */
23528 if (all_same
23529 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
23530 XVECEXP (vals, 0, 0)))
23531 return;
23532
23533 /* Values where only one field is non-constant are best loaded from
23534 the pool and overwritten via move later. */
23535 if (n_var == 1)
23536 {
23537 if (all_const_zero
23538 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
23539 XVECEXP (vals, 0, one_var),
23540 one_var))
23541 return;
23542
23543 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
23544 return;
23545 }
23546
23547 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
23548 }
23549
23550 void
23551 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
23552 {
23553 enum machine_mode mode = GET_MODE (target);
23554 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23555 bool use_vec_merge = false;
23556 rtx tmp;
23557
23558 switch (mode)
23559 {
23560 case V2SFmode:
23561 case V2SImode:
23562 if (mmx_ok)
23563 {
23564 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
23565 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
23566 if (elt == 0)
23567 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
23568 else
23569 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
23570 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23571 return;
23572 }
23573 break;
23574
23575 case V2DImode:
23576 use_vec_merge = TARGET_SSE4_1;
23577 if (use_vec_merge)
23578 break;
23579
23580 case V2DFmode:
23581 {
23582 rtx op0, op1;
23583
23584 /* For the two element vectors, we implement a VEC_CONCAT with
23585 the extraction of the other element. */
23586
23587 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
23588 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
23589
23590 if (elt == 0)
23591 op0 = val, op1 = tmp;
23592 else
23593 op0 = tmp, op1 = val;
23594
23595 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
23596 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23597 }
23598 return;
23599
23600 case V4SFmode:
23601 use_vec_merge = TARGET_SSE4_1;
23602 if (use_vec_merge)
23603 break;
23604
23605 switch (elt)
23606 {
23607 case 0:
23608 use_vec_merge = true;
23609 break;
23610
23611 case 1:
23612 /* tmp = target = A B C D */
23613 tmp = copy_to_reg (target);
23614 /* target = A A B B */
23615 emit_insn (gen_sse_unpcklps (target, target, target));
23616 /* target = X A B B */
23617 ix86_expand_vector_set (false, target, val, 0);
23618 /* target = A X C D */
23619 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23620 GEN_INT (1), GEN_INT (0),
23621 GEN_INT (2+4), GEN_INT (3+4)));
23622 return;
23623
23624 case 2:
23625 /* tmp = target = A B C D */
23626 tmp = copy_to_reg (target);
23627 /* tmp = X B C D */
23628 ix86_expand_vector_set (false, tmp, val, 0);
23629 /* target = A B X D */
23630 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23631 GEN_INT (0), GEN_INT (1),
23632 GEN_INT (0+4), GEN_INT (3+4)));
23633 return;
23634
23635 case 3:
23636 /* tmp = target = A B C D */
23637 tmp = copy_to_reg (target);
23638 /* tmp = X B C D */
23639 ix86_expand_vector_set (false, tmp, val, 0);
23640 /* target = A B X D */
23641 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23642 GEN_INT (0), GEN_INT (1),
23643 GEN_INT (2+4), GEN_INT (0+4)));
23644 return;
23645
23646 default:
23647 gcc_unreachable ();
23648 }
23649 break;
23650
23651 case V4SImode:
23652 use_vec_merge = TARGET_SSE4_1;
23653 if (use_vec_merge)
23654 break;
23655
23656 /* Element 0 handled by vec_merge below. */
23657 if (elt == 0)
23658 {
23659 use_vec_merge = true;
23660 break;
23661 }
23662
23663 if (TARGET_SSE2)
23664 {
23665 /* With SSE2, use integer shuffles to swap element 0 and ELT,
23666 store into element 0, then shuffle them back. */
23667
23668 rtx order[4];
23669
23670 order[0] = GEN_INT (elt);
23671 order[1] = const1_rtx;
23672 order[2] = const2_rtx;
23673 order[3] = GEN_INT (3);
23674 order[elt] = const0_rtx;
23675
23676 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
23677 order[1], order[2], order[3]));
23678
23679 ix86_expand_vector_set (false, target, val, 0);
23680
23681 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
23682 order[1], order[2], order[3]));
23683 }
23684 else
23685 {
23686 /* For SSE1, we have to reuse the V4SF code. */
23687 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
23688 gen_lowpart (SFmode, val), elt);
23689 }
23690 return;
23691
23692 case V8HImode:
23693 use_vec_merge = TARGET_SSE2;
23694 break;
23695 case V4HImode:
23696 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
23697 break;
23698
23699 case V16QImode:
23700 use_vec_merge = TARGET_SSE4_1;
23701 break;
23702
23703 case V8QImode:
23704 default:
23705 break;
23706 }
23707
23708 if (use_vec_merge)
23709 {
23710 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
23711 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
23712 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23713 }
23714 else
23715 {
23716 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
23717
23718 emit_move_insn (mem, target);
23719
23720 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
23721 emit_move_insn (tmp, val);
23722
23723 emit_move_insn (target, mem);
23724 }
23725 }
23726
23727 void
23728 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
23729 {
23730 enum machine_mode mode = GET_MODE (vec);
23731 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23732 bool use_vec_extr = false;
23733 rtx tmp;
23734
23735 switch (mode)
23736 {
23737 case V2SImode:
23738 case V2SFmode:
23739 if (!mmx_ok)
23740 break;
23741 /* FALLTHRU */
23742
23743 case V2DFmode:
23744 case V2DImode:
23745 use_vec_extr = true;
23746 break;
23747
23748 case V4SFmode:
23749 use_vec_extr = TARGET_SSE4_1;
23750 if (use_vec_extr)
23751 break;
23752
23753 switch (elt)
23754 {
23755 case 0:
23756 tmp = vec;
23757 break;
23758
23759 case 1:
23760 case 3:
23761 tmp = gen_reg_rtx (mode);
23762 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
23763 GEN_INT (elt), GEN_INT (elt),
23764 GEN_INT (elt+4), GEN_INT (elt+4)));
23765 break;
23766
23767 case 2:
23768 tmp = gen_reg_rtx (mode);
23769 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
23770 break;
23771
23772 default:
23773 gcc_unreachable ();
23774 }
23775 vec = tmp;
23776 use_vec_extr = true;
23777 elt = 0;
23778 break;
23779
23780 case V4SImode:
23781 use_vec_extr = TARGET_SSE4_1;
23782 if (use_vec_extr)
23783 break;
23784
23785 if (TARGET_SSE2)
23786 {
23787 switch (elt)
23788 {
23789 case 0:
23790 tmp = vec;
23791 break;
23792
23793 case 1:
23794 case 3:
23795 tmp = gen_reg_rtx (mode);
23796 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
23797 GEN_INT (elt), GEN_INT (elt),
23798 GEN_INT (elt), GEN_INT (elt)));
23799 break;
23800
23801 case 2:
23802 tmp = gen_reg_rtx (mode);
23803 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
23804 break;
23805
23806 default:
23807 gcc_unreachable ();
23808 }
23809 vec = tmp;
23810 use_vec_extr = true;
23811 elt = 0;
23812 }
23813 else
23814 {
23815 /* For SSE1, we have to reuse the V4SF code. */
23816 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
23817 gen_lowpart (V4SFmode, vec), elt);
23818 return;
23819 }
23820 break;
23821
23822 case V8HImode:
23823 use_vec_extr = TARGET_SSE2;
23824 break;
23825 case V4HImode:
23826 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
23827 break;
23828
23829 case V16QImode:
23830 use_vec_extr = TARGET_SSE4_1;
23831 break;
23832
23833 case V8QImode:
23834 /* ??? Could extract the appropriate HImode element and shift. */
23835 default:
23836 break;
23837 }
23838
23839 if (use_vec_extr)
23840 {
23841 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
23842 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
23843
23844 /* Let the rtl optimizers know about the zero extension performed. */
23845 if (inner_mode == QImode || inner_mode == HImode)
23846 {
23847 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
23848 target = gen_lowpart (SImode, target);
23849 }
23850
23851 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23852 }
23853 else
23854 {
23855 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
23856
23857 emit_move_insn (mem, vec);
23858
23859 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
23860 emit_move_insn (target, tmp);
23861 }
23862 }
23863
23864 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
23865 pattern to reduce; DEST is the destination; IN is the input vector. */
23866
23867 void
23868 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
23869 {
23870 rtx tmp1, tmp2, tmp3;
23871
23872 tmp1 = gen_reg_rtx (V4SFmode);
23873 tmp2 = gen_reg_rtx (V4SFmode);
23874 tmp3 = gen_reg_rtx (V4SFmode);
23875
23876 emit_insn (gen_sse_movhlps (tmp1, in, in));
23877 emit_insn (fn (tmp2, tmp1, in));
23878
23879 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
23880 GEN_INT (1), GEN_INT (1),
23881 GEN_INT (1+4), GEN_INT (1+4)));
23882 emit_insn (fn (dest, tmp2, tmp3));
23883 }
23884 \f
23885 /* Target hook for scalar_mode_supported_p. */
23886 static bool
23887 ix86_scalar_mode_supported_p (enum machine_mode mode)
23888 {
23889 if (DECIMAL_FLOAT_MODE_P (mode))
23890 return true;
23891 else if (mode == TFmode)
23892 return TARGET_64BIT;
23893 else
23894 return default_scalar_mode_supported_p (mode);
23895 }
23896
23897 /* Implements target hook vector_mode_supported_p. */
23898 static bool
23899 ix86_vector_mode_supported_p (enum machine_mode mode)
23900 {
23901 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
23902 return true;
23903 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
23904 return true;
23905 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
23906 return true;
23907 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
23908 return true;
23909 return false;
23910 }
23911
23912 /* Target hook for c_mode_for_suffix. */
23913 static enum machine_mode
23914 ix86_c_mode_for_suffix (char suffix)
23915 {
23916 if (TARGET_64BIT && suffix == 'q')
23917 return TFmode;
23918 if (TARGET_MMX && suffix == 'w')
23919 return XFmode;
23920
23921 return VOIDmode;
23922 }
23923
23924 /* Worker function for TARGET_MD_ASM_CLOBBERS.
23925
23926 We do this in the new i386 backend to maintain source compatibility
23927 with the old cc0-based compiler. */
23928
23929 static tree
23930 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
23931 tree inputs ATTRIBUTE_UNUSED,
23932 tree clobbers)
23933 {
23934 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
23935 clobbers);
23936 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
23937 clobbers);
23938 return clobbers;
23939 }
23940
23941 /* Implements target vector targetm.asm.encode_section_info. This
23942 is not used by netware. */
23943
23944 static void ATTRIBUTE_UNUSED
23945 ix86_encode_section_info (tree decl, rtx rtl, int first)
23946 {
23947 default_encode_section_info (decl, rtl, first);
23948
23949 if (TREE_CODE (decl) == VAR_DECL
23950 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
23951 && ix86_in_large_data_p (decl))
23952 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
23953 }
23954
23955 /* Worker function for REVERSE_CONDITION. */
23956
23957 enum rtx_code
23958 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
23959 {
23960 return (mode != CCFPmode && mode != CCFPUmode
23961 ? reverse_condition (code)
23962 : reverse_condition_maybe_unordered (code));
23963 }
23964
23965 /* Output code to perform an x87 FP register move, from OPERANDS[1]
23966 to OPERANDS[0]. */
23967
23968 const char *
23969 output_387_reg_move (rtx insn, rtx *operands)
23970 {
23971 if (REG_P (operands[0]))
23972 {
23973 if (REG_P (operands[1])
23974 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
23975 {
23976 if (REGNO (operands[0]) == FIRST_STACK_REG)
23977 return output_387_ffreep (operands, 0);
23978 return "fstp\t%y0";
23979 }
23980 if (STACK_TOP_P (operands[0]))
23981 return "fld%z1\t%y1";
23982 return "fst\t%y0";
23983 }
23984 else if (MEM_P (operands[0]))
23985 {
23986 gcc_assert (REG_P (operands[1]));
23987 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
23988 return "fstp%z0\t%y0";
23989 else
23990 {
23991 /* There is no non-popping store to memory for XFmode.
23992 So if we need one, follow the store with a load. */
23993 if (GET_MODE (operands[0]) == XFmode)
23994 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
23995 else
23996 return "fst%z0\t%y0";
23997 }
23998 }
23999 else
24000 gcc_unreachable();
24001 }
24002
24003 /* Output code to perform a conditional jump to LABEL, if C2 flag in
24004 FP status register is set. */
24005
24006 void
24007 ix86_emit_fp_unordered_jump (rtx label)
24008 {
24009 rtx reg = gen_reg_rtx (HImode);
24010 rtx temp;
24011
24012 emit_insn (gen_x86_fnstsw_1 (reg));
24013
24014 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
24015 {
24016 emit_insn (gen_x86_sahf_1 (reg));
24017
24018 temp = gen_rtx_REG (CCmode, FLAGS_REG);
24019 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
24020 }
24021 else
24022 {
24023 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
24024
24025 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24026 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
24027 }
24028
24029 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
24030 gen_rtx_LABEL_REF (VOIDmode, label),
24031 pc_rtx);
24032 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
24033
24034 emit_jump_insn (temp);
24035 predict_jump (REG_BR_PROB_BASE * 10 / 100);
24036 }
24037
24038 /* Output code to perform a log1p XFmode calculation. */
24039
24040 void ix86_emit_i387_log1p (rtx op0, rtx op1)
24041 {
24042 rtx label1 = gen_label_rtx ();
24043 rtx label2 = gen_label_rtx ();
24044
24045 rtx tmp = gen_reg_rtx (XFmode);
24046 rtx tmp2 = gen_reg_rtx (XFmode);
24047
24048 emit_insn (gen_absxf2 (tmp, op1));
24049 emit_insn (gen_cmpxf (tmp,
24050 CONST_DOUBLE_FROM_REAL_VALUE (
24051 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
24052 XFmode)));
24053 emit_jump_insn (gen_bge (label1));
24054
24055 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24056 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
24057 emit_jump (label2);
24058
24059 emit_label (label1);
24060 emit_move_insn (tmp, CONST1_RTX (XFmode));
24061 emit_insn (gen_addxf3 (tmp, op1, tmp));
24062 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24063 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
24064
24065 emit_label (label2);
24066 }
24067
24068 /* Output code to perform a Newton-Rhapson approximation of a single precision
24069 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
24070
24071 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
24072 {
24073 rtx x0, x1, e0, e1, two;
24074
24075 x0 = gen_reg_rtx (mode);
24076 e0 = gen_reg_rtx (mode);
24077 e1 = gen_reg_rtx (mode);
24078 x1 = gen_reg_rtx (mode);
24079
24080 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
24081
24082 if (VECTOR_MODE_P (mode))
24083 two = ix86_build_const_vector (SFmode, true, two);
24084
24085 two = force_reg (mode, two);
24086
24087 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24088
24089 /* x0 = 1./b estimate */
24090 emit_insn (gen_rtx_SET (VOIDmode, x0,
24091 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
24092 UNSPEC_RCP)));
24093 /* e0 = x0 * b */
24094 emit_insn (gen_rtx_SET (VOIDmode, e0,
24095 gen_rtx_MULT (mode, x0, b)));
24096 /* e1 = 2. - e0 */
24097 emit_insn (gen_rtx_SET (VOIDmode, e1,
24098 gen_rtx_MINUS (mode, two, e0)));
24099 /* x1 = x0 * e1 */
24100 emit_insn (gen_rtx_SET (VOIDmode, x1,
24101 gen_rtx_MULT (mode, x0, e1)));
24102 /* res = a * x1 */
24103 emit_insn (gen_rtx_SET (VOIDmode, res,
24104 gen_rtx_MULT (mode, a, x1)));
24105 }
24106
24107 /* Output code to perform a Newton-Rhapson approximation of a
24108 single precision floating point [reciprocal] square root. */
24109
24110 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
24111 bool recip)
24112 {
24113 rtx x0, e0, e1, e2, e3, three, half, zero, mask;
24114
24115 x0 = gen_reg_rtx (mode);
24116 e0 = gen_reg_rtx (mode);
24117 e1 = gen_reg_rtx (mode);
24118 e2 = gen_reg_rtx (mode);
24119 e3 = gen_reg_rtx (mode);
24120
24121 three = CONST_DOUBLE_FROM_REAL_VALUE (dconst3, SFmode);
24122 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode);
24123
24124 mask = gen_reg_rtx (mode);
24125
24126 if (VECTOR_MODE_P (mode))
24127 {
24128 three = ix86_build_const_vector (SFmode, true, three);
24129 half = ix86_build_const_vector (SFmode, true, half);
24130 }
24131
24132 three = force_reg (mode, three);
24133 half = force_reg (mode, half);
24134
24135 zero = force_reg (mode, CONST0_RTX(mode));
24136
24137 /* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
24138 1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
24139
24140 /* Compare a to zero. */
24141 emit_insn (gen_rtx_SET (VOIDmode, mask,
24142 gen_rtx_NE (mode, a, zero)));
24143
24144 /* x0 = 1./sqrt(a) estimate */
24145 emit_insn (gen_rtx_SET (VOIDmode, x0,
24146 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
24147 UNSPEC_RSQRT)));
24148 /* Filter out infinity. */
24149 if (VECTOR_MODE_P (mode))
24150 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (V4SFmode, x0),
24151 gen_rtx_AND (mode,
24152 gen_lowpart (V4SFmode, x0),
24153 gen_lowpart (V4SFmode, mask))));
24154 else
24155 emit_insn (gen_rtx_SET (VOIDmode, x0,
24156 gen_rtx_AND (mode, x0, mask)));
24157
24158 /* e0 = x0 * a */
24159 emit_insn (gen_rtx_SET (VOIDmode, e0,
24160 gen_rtx_MULT (mode, x0, a)));
24161 /* e1 = e0 * x0 */
24162 emit_insn (gen_rtx_SET (VOIDmode, e1,
24163 gen_rtx_MULT (mode, e0, x0)));
24164 /* e2 = 3. - e1 */
24165 emit_insn (gen_rtx_SET (VOIDmode, e2,
24166 gen_rtx_MINUS (mode, three, e1)));
24167 if (recip)
24168 /* e3 = .5 * x0 */
24169 emit_insn (gen_rtx_SET (VOIDmode, e3,
24170 gen_rtx_MULT (mode, half, x0)));
24171 else
24172 /* e3 = .5 * e0 */
24173 emit_insn (gen_rtx_SET (VOIDmode, e3,
24174 gen_rtx_MULT (mode, half, e0)));
24175 /* ret = e2 * e3 */
24176 emit_insn (gen_rtx_SET (VOIDmode, res,
24177 gen_rtx_MULT (mode, e2, e3)));
24178 }
24179
24180 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
24181
24182 static void ATTRIBUTE_UNUSED
24183 i386_solaris_elf_named_section (const char *name, unsigned int flags,
24184 tree decl)
24185 {
24186 /* With Binutils 2.15, the "@unwind" marker must be specified on
24187 every occurrence of the ".eh_frame" section, not just the first
24188 one. */
24189 if (TARGET_64BIT
24190 && strcmp (name, ".eh_frame") == 0)
24191 {
24192 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
24193 flags & SECTION_WRITE ? "aw" : "a");
24194 return;
24195 }
24196 default_elf_asm_named_section (name, flags, decl);
24197 }
24198
24199 /* Return the mangling of TYPE if it is an extended fundamental type. */
24200
24201 static const char *
24202 ix86_mangle_type (const_tree type)
24203 {
24204 type = TYPE_MAIN_VARIANT (type);
24205
24206 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
24207 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
24208 return NULL;
24209
24210 switch (TYPE_MODE (type))
24211 {
24212 case TFmode:
24213 /* __float128 is "g". */
24214 return "g";
24215 case XFmode:
24216 /* "long double" or __float80 is "e". */
24217 return "e";
24218 default:
24219 return NULL;
24220 }
24221 }
24222
24223 /* For 32-bit code we can save PIC register setup by using
24224 __stack_chk_fail_local hidden function instead of calling
24225 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
24226 register, so it is better to call __stack_chk_fail directly. */
24227
24228 static tree
24229 ix86_stack_protect_fail (void)
24230 {
24231 return TARGET_64BIT
24232 ? default_external_stack_protect_fail ()
24233 : default_hidden_stack_protect_fail ();
24234 }
24235
24236 /* Select a format to encode pointers in exception handling data. CODE
24237 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
24238 true if the symbol may be affected by dynamic relocations.
24239
24240 ??? All x86 object file formats are capable of representing this.
24241 After all, the relocation needed is the same as for the call insn.
24242 Whether or not a particular assembler allows us to enter such, I
24243 guess we'll have to see. */
24244 int
24245 asm_preferred_eh_data_format (int code, int global)
24246 {
24247 if (flag_pic)
24248 {
24249 int type = DW_EH_PE_sdata8;
24250 if (!TARGET_64BIT
24251 || ix86_cmodel == CM_SMALL_PIC
24252 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
24253 type = DW_EH_PE_sdata4;
24254 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
24255 }
24256 if (ix86_cmodel == CM_SMALL
24257 || (ix86_cmodel == CM_MEDIUM && code))
24258 return DW_EH_PE_udata4;
24259 return DW_EH_PE_absptr;
24260 }
24261 \f
24262 /* Expand copysign from SIGN to the positive value ABS_VALUE
24263 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
24264 the sign-bit. */
24265 static void
24266 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
24267 {
24268 enum machine_mode mode = GET_MODE (sign);
24269 rtx sgn = gen_reg_rtx (mode);
24270 if (mask == NULL_RTX)
24271 {
24272 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
24273 if (!VECTOR_MODE_P (mode))
24274 {
24275 /* We need to generate a scalar mode mask in this case. */
24276 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24277 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24278 mask = gen_reg_rtx (mode);
24279 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24280 }
24281 }
24282 else
24283 mask = gen_rtx_NOT (mode, mask);
24284 emit_insn (gen_rtx_SET (VOIDmode, sgn,
24285 gen_rtx_AND (mode, mask, sign)));
24286 emit_insn (gen_rtx_SET (VOIDmode, result,
24287 gen_rtx_IOR (mode, abs_value, sgn)));
24288 }
24289
24290 /* Expand fabs (OP0) and return a new rtx that holds the result. The
24291 mask for masking out the sign-bit is stored in *SMASK, if that is
24292 non-null. */
24293 static rtx
24294 ix86_expand_sse_fabs (rtx op0, rtx *smask)
24295 {
24296 enum machine_mode mode = GET_MODE (op0);
24297 rtx xa, mask;
24298
24299 xa = gen_reg_rtx (mode);
24300 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
24301 if (!VECTOR_MODE_P (mode))
24302 {
24303 /* We need to generate a scalar mode mask in this case. */
24304 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24305 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24306 mask = gen_reg_rtx (mode);
24307 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24308 }
24309 emit_insn (gen_rtx_SET (VOIDmode, xa,
24310 gen_rtx_AND (mode, op0, mask)));
24311
24312 if (smask)
24313 *smask = mask;
24314
24315 return xa;
24316 }
24317
24318 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
24319 swapping the operands if SWAP_OPERANDS is true. The expanded
24320 code is a forward jump to a newly created label in case the
24321 comparison is true. The generated label rtx is returned. */
24322 static rtx
24323 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
24324 bool swap_operands)
24325 {
24326 rtx label, tmp;
24327
24328 if (swap_operands)
24329 {
24330 tmp = op0;
24331 op0 = op1;
24332 op1 = tmp;
24333 }
24334
24335 label = gen_label_rtx ();
24336 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
24337 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24338 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
24339 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
24340 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24341 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
24342 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24343 JUMP_LABEL (tmp) = label;
24344
24345 return label;
24346 }
24347
24348 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
24349 using comparison code CODE. Operands are swapped for the comparison if
24350 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
24351 static rtx
24352 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
24353 bool swap_operands)
24354 {
24355 enum machine_mode mode = GET_MODE (op0);
24356 rtx mask = gen_reg_rtx (mode);
24357
24358 if (swap_operands)
24359 {
24360 rtx tmp = op0;
24361 op0 = op1;
24362 op1 = tmp;
24363 }
24364
24365 if (mode == DFmode)
24366 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
24367 gen_rtx_fmt_ee (code, mode, op0, op1)));
24368 else
24369 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
24370 gen_rtx_fmt_ee (code, mode, op0, op1)));
24371
24372 return mask;
24373 }
24374
24375 /* Generate and return a rtx of mode MODE for 2**n where n is the number
24376 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
24377 static rtx
24378 ix86_gen_TWO52 (enum machine_mode mode)
24379 {
24380 REAL_VALUE_TYPE TWO52r;
24381 rtx TWO52;
24382
24383 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
24384 TWO52 = const_double_from_real_value (TWO52r, mode);
24385 TWO52 = force_reg (mode, TWO52);
24386
24387 return TWO52;
24388 }
24389
24390 /* Expand SSE sequence for computing lround from OP1 storing
24391 into OP0. */
24392 void
24393 ix86_expand_lround (rtx op0, rtx op1)
24394 {
24395 /* C code for the stuff we're doing below:
24396 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
24397 return (long)tmp;
24398 */
24399 enum machine_mode mode = GET_MODE (op1);
24400 const struct real_format *fmt;
24401 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
24402 rtx adj;
24403
24404 /* load nextafter (0.5, 0.0) */
24405 fmt = REAL_MODE_FORMAT (mode);
24406 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
24407 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
24408
24409 /* adj = copysign (0.5, op1) */
24410 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
24411 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
24412
24413 /* adj = op1 + adj */
24414 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
24415
24416 /* op0 = (imode)adj */
24417 expand_fix (op0, adj, 0);
24418 }
24419
24420 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
24421 into OPERAND0. */
24422 void
24423 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
24424 {
24425 /* C code for the stuff we're doing below (for do_floor):
24426 xi = (long)op1;
24427 xi -= (double)xi > op1 ? 1 : 0;
24428 return xi;
24429 */
24430 enum machine_mode fmode = GET_MODE (op1);
24431 enum machine_mode imode = GET_MODE (op0);
24432 rtx ireg, freg, label, tmp;
24433
24434 /* reg = (long)op1 */
24435 ireg = gen_reg_rtx (imode);
24436 expand_fix (ireg, op1, 0);
24437
24438 /* freg = (double)reg */
24439 freg = gen_reg_rtx (fmode);
24440 expand_float (freg, ireg, 0);
24441
24442 /* ireg = (freg > op1) ? ireg - 1 : ireg */
24443 label = ix86_expand_sse_compare_and_jump (UNLE,
24444 freg, op1, !do_floor);
24445 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
24446 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
24447 emit_move_insn (ireg, tmp);
24448
24449 emit_label (label);
24450 LABEL_NUSES (label) = 1;
24451
24452 emit_move_insn (op0, ireg);
24453 }
24454
24455 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
24456 result in OPERAND0. */
24457 void
24458 ix86_expand_rint (rtx operand0, rtx operand1)
24459 {
24460 /* C code for the stuff we're doing below:
24461 xa = fabs (operand1);
24462 if (!isless (xa, 2**52))
24463 return operand1;
24464 xa = xa + 2**52 - 2**52;
24465 return copysign (xa, operand1);
24466 */
24467 enum machine_mode mode = GET_MODE (operand0);
24468 rtx res, xa, label, TWO52, mask;
24469
24470 res = gen_reg_rtx (mode);
24471 emit_move_insn (res, operand1);
24472
24473 /* xa = abs (operand1) */
24474 xa = ix86_expand_sse_fabs (res, &mask);
24475
24476 /* if (!isless (xa, TWO52)) goto label; */
24477 TWO52 = ix86_gen_TWO52 (mode);
24478 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24479
24480 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24481 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24482
24483 ix86_sse_copysign_to_positive (res, xa, res, mask);
24484
24485 emit_label (label);
24486 LABEL_NUSES (label) = 1;
24487
24488 emit_move_insn (operand0, res);
24489 }
24490
24491 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24492 into OPERAND0. */
24493 void
24494 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
24495 {
24496 /* C code for the stuff we expand below.
24497 double xa = fabs (x), x2;
24498 if (!isless (xa, TWO52))
24499 return x;
24500 xa = xa + TWO52 - TWO52;
24501 x2 = copysign (xa, x);
24502 Compensate. Floor:
24503 if (x2 > x)
24504 x2 -= 1;
24505 Compensate. Ceil:
24506 if (x2 < x)
24507 x2 -= -1;
24508 return x2;
24509 */
24510 enum machine_mode mode = GET_MODE (operand0);
24511 rtx xa, TWO52, tmp, label, one, res, mask;
24512
24513 TWO52 = ix86_gen_TWO52 (mode);
24514
24515 /* Temporary for holding the result, initialized to the input
24516 operand to ease control flow. */
24517 res = gen_reg_rtx (mode);
24518 emit_move_insn (res, operand1);
24519
24520 /* xa = abs (operand1) */
24521 xa = ix86_expand_sse_fabs (res, &mask);
24522
24523 /* if (!isless (xa, TWO52)) goto label; */
24524 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24525
24526 /* xa = xa + TWO52 - TWO52; */
24527 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24528 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24529
24530 /* xa = copysign (xa, operand1) */
24531 ix86_sse_copysign_to_positive (xa, xa, res, mask);
24532
24533 /* generate 1.0 or -1.0 */
24534 one = force_reg (mode,
24535 const_double_from_real_value (do_floor
24536 ? dconst1 : dconstm1, mode));
24537
24538 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24539 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
24540 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24541 gen_rtx_AND (mode, one, tmp)));
24542 /* We always need to subtract here to preserve signed zero. */
24543 tmp = expand_simple_binop (mode, MINUS,
24544 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24545 emit_move_insn (res, tmp);
24546
24547 emit_label (label);
24548 LABEL_NUSES (label) = 1;
24549
24550 emit_move_insn (operand0, res);
24551 }
24552
24553 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24554 into OPERAND0. */
24555 void
24556 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
24557 {
24558 /* C code for the stuff we expand below.
24559 double xa = fabs (x), x2;
24560 if (!isless (xa, TWO52))
24561 return x;
24562 x2 = (double)(long)x;
24563 Compensate. Floor:
24564 if (x2 > x)
24565 x2 -= 1;
24566 Compensate. Ceil:
24567 if (x2 < x)
24568 x2 += 1;
24569 if (HONOR_SIGNED_ZEROS (mode))
24570 return copysign (x2, x);
24571 return x2;
24572 */
24573 enum machine_mode mode = GET_MODE (operand0);
24574 rtx xa, xi, TWO52, tmp, label, one, res, mask;
24575
24576 TWO52 = ix86_gen_TWO52 (mode);
24577
24578 /* Temporary for holding the result, initialized to the input
24579 operand to ease control flow. */
24580 res = gen_reg_rtx (mode);
24581 emit_move_insn (res, operand1);
24582
24583 /* xa = abs (operand1) */
24584 xa = ix86_expand_sse_fabs (res, &mask);
24585
24586 /* if (!isless (xa, TWO52)) goto label; */
24587 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24588
24589 /* xa = (double)(long)x */
24590 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24591 expand_fix (xi, res, 0);
24592 expand_float (xa, xi, 0);
24593
24594 /* generate 1.0 */
24595 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
24596
24597 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24598 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
24599 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24600 gen_rtx_AND (mode, one, tmp)));
24601 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
24602 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24603 emit_move_insn (res, tmp);
24604
24605 if (HONOR_SIGNED_ZEROS (mode))
24606 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
24607
24608 emit_label (label);
24609 LABEL_NUSES (label) = 1;
24610
24611 emit_move_insn (operand0, res);
24612 }
24613
24614 /* Expand SSE sequence for computing round from OPERAND1 storing
24615 into OPERAND0. Sequence that works without relying on DImode truncation
24616 via cvttsd2siq that is only available on 64bit targets. */
24617 void
24618 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
24619 {
24620 /* C code for the stuff we expand below.
24621 double xa = fabs (x), xa2, x2;
24622 if (!isless (xa, TWO52))
24623 return x;
24624 Using the absolute value and copying back sign makes
24625 -0.0 -> -0.0 correct.
24626 xa2 = xa + TWO52 - TWO52;
24627 Compensate.
24628 dxa = xa2 - xa;
24629 if (dxa <= -0.5)
24630 xa2 += 1;
24631 else if (dxa > 0.5)
24632 xa2 -= 1;
24633 x2 = copysign (xa2, x);
24634 return x2;
24635 */
24636 enum machine_mode mode = GET_MODE (operand0);
24637 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
24638
24639 TWO52 = ix86_gen_TWO52 (mode);
24640
24641 /* Temporary for holding the result, initialized to the input
24642 operand to ease control flow. */
24643 res = gen_reg_rtx (mode);
24644 emit_move_insn (res, operand1);
24645
24646 /* xa = abs (operand1) */
24647 xa = ix86_expand_sse_fabs (res, &mask);
24648
24649 /* if (!isless (xa, TWO52)) goto label; */
24650 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24651
24652 /* xa2 = xa + TWO52 - TWO52; */
24653 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24654 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
24655
24656 /* dxa = xa2 - xa; */
24657 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
24658
24659 /* generate 0.5, 1.0 and -0.5 */
24660 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
24661 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
24662 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
24663 0, OPTAB_DIRECT);
24664
24665 /* Compensate. */
24666 tmp = gen_reg_rtx (mode);
24667 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
24668 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
24669 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24670 gen_rtx_AND (mode, one, tmp)));
24671 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24672 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
24673 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
24674 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24675 gen_rtx_AND (mode, one, tmp)));
24676 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24677
24678 /* res = copysign (xa2, operand1) */
24679 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
24680
24681 emit_label (label);
24682 LABEL_NUSES (label) = 1;
24683
24684 emit_move_insn (operand0, res);
24685 }
24686
24687 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24688 into OPERAND0. */
24689 void
24690 ix86_expand_trunc (rtx operand0, rtx operand1)
24691 {
24692 /* C code for SSE variant we expand below.
24693 double xa = fabs (x), x2;
24694 if (!isless (xa, TWO52))
24695 return x;
24696 x2 = (double)(long)x;
24697 if (HONOR_SIGNED_ZEROS (mode))
24698 return copysign (x2, x);
24699 return x2;
24700 */
24701 enum machine_mode mode = GET_MODE (operand0);
24702 rtx xa, xi, TWO52, label, res, mask;
24703
24704 TWO52 = ix86_gen_TWO52 (mode);
24705
24706 /* Temporary for holding the result, initialized to the input
24707 operand to ease control flow. */
24708 res = gen_reg_rtx (mode);
24709 emit_move_insn (res, operand1);
24710
24711 /* xa = abs (operand1) */
24712 xa = ix86_expand_sse_fabs (res, &mask);
24713
24714 /* if (!isless (xa, TWO52)) goto label; */
24715 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24716
24717 /* x = (double)(long)x */
24718 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24719 expand_fix (xi, res, 0);
24720 expand_float (res, xi, 0);
24721
24722 if (HONOR_SIGNED_ZEROS (mode))
24723 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
24724
24725 emit_label (label);
24726 LABEL_NUSES (label) = 1;
24727
24728 emit_move_insn (operand0, res);
24729 }
24730
24731 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24732 into OPERAND0. */
24733 void
24734 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
24735 {
24736 enum machine_mode mode = GET_MODE (operand0);
24737 rtx xa, mask, TWO52, label, one, res, smask, tmp;
24738
24739 /* C code for SSE variant we expand below.
24740 double xa = fabs (x), x2;
24741 if (!isless (xa, TWO52))
24742 return x;
24743 xa2 = xa + TWO52 - TWO52;
24744 Compensate:
24745 if (xa2 > xa)
24746 xa2 -= 1.0;
24747 x2 = copysign (xa2, x);
24748 return x2;
24749 */
24750
24751 TWO52 = ix86_gen_TWO52 (mode);
24752
24753 /* Temporary for holding the result, initialized to the input
24754 operand to ease control flow. */
24755 res = gen_reg_rtx (mode);
24756 emit_move_insn (res, operand1);
24757
24758 /* xa = abs (operand1) */
24759 xa = ix86_expand_sse_fabs (res, &smask);
24760
24761 /* if (!isless (xa, TWO52)) goto label; */
24762 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24763
24764 /* res = xa + TWO52 - TWO52; */
24765 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24766 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
24767 emit_move_insn (res, tmp);
24768
24769 /* generate 1.0 */
24770 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
24771
24772 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
24773 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
24774 emit_insn (gen_rtx_SET (VOIDmode, mask,
24775 gen_rtx_AND (mode, mask, one)));
24776 tmp = expand_simple_binop (mode, MINUS,
24777 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
24778 emit_move_insn (res, tmp);
24779
24780 /* res = copysign (res, operand1) */
24781 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
24782
24783 emit_label (label);
24784 LABEL_NUSES (label) = 1;
24785
24786 emit_move_insn (operand0, res);
24787 }
24788
24789 /* Expand SSE sequence for computing round from OPERAND1 storing
24790 into OPERAND0. */
24791 void
24792 ix86_expand_round (rtx operand0, rtx operand1)
24793 {
24794 /* C code for the stuff we're doing below:
24795 double xa = fabs (x);
24796 if (!isless (xa, TWO52))
24797 return x;
24798 xa = (double)(long)(xa + nextafter (0.5, 0.0));
24799 return copysign (xa, x);
24800 */
24801 enum machine_mode mode = GET_MODE (operand0);
24802 rtx res, TWO52, xa, label, xi, half, mask;
24803 const struct real_format *fmt;
24804 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
24805
24806 /* Temporary for holding the result, initialized to the input
24807 operand to ease control flow. */
24808 res = gen_reg_rtx (mode);
24809 emit_move_insn (res, operand1);
24810
24811 TWO52 = ix86_gen_TWO52 (mode);
24812 xa = ix86_expand_sse_fabs (res, &mask);
24813 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24814
24815 /* load nextafter (0.5, 0.0) */
24816 fmt = REAL_MODE_FORMAT (mode);
24817 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
24818 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
24819
24820 /* xa = xa + 0.5 */
24821 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
24822 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
24823
24824 /* xa = (double)(int64_t)xa */
24825 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24826 expand_fix (xi, xa, 0);
24827 expand_float (xa, xi, 0);
24828
24829 /* res = copysign (xa, operand1) */
24830 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
24831
24832 emit_label (label);
24833 LABEL_NUSES (label) = 1;
24834
24835 emit_move_insn (operand0, res);
24836 }
24837
24838 \f
24839 /* Validate whether a SSE5 instruction is valid or not.
24840 OPERANDS is the array of operands.
24841 NUM is the number of operands.
24842 USES_OC0 is true if the instruction uses OC0 and provides 4 varients.
24843 NUM_MEMORY is the maximum number of memory operands to accept. */
24844 bool ix86_sse5_valid_op_p (rtx operands[], rtx insn, int num, bool uses_oc0, int num_memory)
24845 {
24846 int mem_mask;
24847 int mem_count;
24848 int i;
24849
24850 /* Count the number of memory arguments */
24851 mem_mask = 0;
24852 mem_count = 0;
24853 for (i = 0; i < num; i++)
24854 {
24855 enum machine_mode mode = GET_MODE (operands[i]);
24856 if (register_operand (operands[i], mode))
24857 ;
24858
24859 else if (memory_operand (operands[i], mode))
24860 {
24861 mem_mask |= (1 << i);
24862 mem_count++;
24863 }
24864
24865 else
24866 {
24867 rtx pattern = PATTERN (insn);
24868
24869 /* allow 0 for pcmov */
24870 if (GET_CODE (pattern) != SET
24871 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
24872 || i < 2
24873 || operands[i] != CONST0_RTX (mode))
24874 return false;
24875 }
24876 }
24877
24878 /* If there were no memory operations, allow the insn */
24879 if (mem_mask == 0)
24880 return true;
24881
24882 /* Do not allow the destination register to be a memory operand. */
24883 else if (mem_mask & (1 << 0))
24884 return false;
24885
24886 /* If there are too many memory operations, disallow the instruction. While
24887 the hardware only allows 1 memory reference, before register allocation
24888 for some insns, we allow two memory operations sometimes in order to allow
24889 code like the following to be optimized:
24890
24891 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
24892
24893 or similar cases that are vectorized into using the fmaddss
24894 instruction. */
24895 else if (mem_count > num_memory)
24896 return false;
24897
24898 /* Don't allow more than one memory operation if not optimizing. */
24899 else if (mem_count > 1 && !optimize)
24900 return false;
24901
24902 else if (num == 4 && mem_count == 1)
24903 {
24904 /* formats (destination is the first argument), example fmaddss:
24905 xmm1, xmm1, xmm2, xmm3/mem
24906 xmm1, xmm1, xmm2/mem, xmm3
24907 xmm1, xmm2, xmm3/mem, xmm1
24908 xmm1, xmm2/mem, xmm3, xmm1 */
24909 if (uses_oc0)
24910 return ((mem_mask == (1 << 1))
24911 || (mem_mask == (1 << 2))
24912 || (mem_mask == (1 << 3)));
24913
24914 /* format, example pmacsdd:
24915 xmm1, xmm2, xmm3/mem, xmm1 */
24916 else
24917 return (mem_mask == (1 << 2));
24918 }
24919
24920 else if (num == 4 && num_memory == 2)
24921 {
24922 /* If there are two memory operations, we can load one of the memory ops
24923 into the destination register. This is for optimizating the
24924 multiply/add ops, which the combiner has optimized both the multiply
24925 and the add insns to have a memory operation. We have to be careful
24926 that the destination doesn't overlap with the inputs. */
24927 rtx op0 = operands[0];
24928
24929 if (reg_mentioned_p (op0, operands[1])
24930 || reg_mentioned_p (op0, operands[2])
24931 || reg_mentioned_p (op0, operands[3]))
24932 return false;
24933
24934 /* formats (destination is the first argument), example fmaddss:
24935 xmm1, xmm1, xmm2, xmm3/mem
24936 xmm1, xmm1, xmm2/mem, xmm3
24937 xmm1, xmm2, xmm3/mem, xmm1
24938 xmm1, xmm2/mem, xmm3, xmm1
24939
24940 For the oc0 case, we will load either operands[1] or operands[3] into
24941 operands[0], so any combination of 2 memory operands is ok. */
24942 if (uses_oc0)
24943 return true;
24944
24945 /* format, example pmacsdd:
24946 xmm1, xmm2, xmm3/mem, xmm1
24947
24948 For the integer multiply/add instructions be more restrictive and
24949 require operands[2] and operands[3] to be the memory operands. */
24950 else
24951 return (mem_mask == ((1 << 2) | (1 << 3)));
24952 }
24953
24954 else if (num == 3 && num_memory == 1)
24955 {
24956 /* formats, example protb:
24957 xmm1, xmm2, xmm3/mem
24958 xmm1, xmm2/mem, xmm3 */
24959 if (uses_oc0)
24960 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
24961
24962 /* format, example comeq:
24963 xmm1, xmm2, xmm3/mem */
24964 else
24965 return (mem_mask == (1 << 2));
24966 }
24967
24968 else
24969 gcc_unreachable ();
24970
24971 return false;
24972 }
24973
24974 \f
24975 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
24976 hardware will allow by using the destination register to load one of the
24977 memory operations. Presently this is used by the multiply/add routines to
24978 allow 2 memory references. */
24979
24980 void
24981 ix86_expand_sse5_multiple_memory (rtx operands[],
24982 int num,
24983 enum machine_mode mode)
24984 {
24985 rtx op0 = operands[0];
24986 if (num != 4
24987 || memory_operand (op0, mode)
24988 || reg_mentioned_p (op0, operands[1])
24989 || reg_mentioned_p (op0, operands[2])
24990 || reg_mentioned_p (op0, operands[3]))
24991 gcc_unreachable ();
24992
24993 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
24994 the destination register. */
24995 if (memory_operand (operands[1], mode))
24996 {
24997 emit_move_insn (op0, operands[1]);
24998 operands[1] = op0;
24999 }
25000 else if (memory_operand (operands[3], mode))
25001 {
25002 emit_move_insn (op0, operands[3]);
25003 operands[3] = op0;
25004 }
25005 else
25006 gcc_unreachable ();
25007
25008 return;
25009 }
25010
25011 \f
25012 /* Table of valid machine attributes. */
25013 static const struct attribute_spec ix86_attribute_table[] =
25014 {
25015 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25016 /* Stdcall attribute says callee is responsible for popping arguments
25017 if they are not variable. */
25018 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25019 /* Fastcall attribute says callee is responsible for popping arguments
25020 if they are not variable. */
25021 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25022 /* Cdecl attribute says the callee is a normal C declaration */
25023 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25024 /* Regparm attribute specifies how many integer arguments are to be
25025 passed in registers. */
25026 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
25027 /* Sseregparm attribute says we are using x86_64 calling conventions
25028 for FP arguments. */
25029 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25030 /* force_align_arg_pointer says this function realigns the stack at entry. */
25031 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
25032 false, true, true, ix86_handle_cconv_attribute },
25033 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25034 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
25035 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
25036 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
25037 #endif
25038 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25039 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25040 #ifdef SUBTARGET_ATTRIBUTE_TABLE
25041 SUBTARGET_ATTRIBUTE_TABLE,
25042 #endif
25043 { NULL, 0, 0, false, false, false, NULL }
25044 };
25045
25046 /* Implement targetm.vectorize.builtin_vectorization_cost. */
25047 static int
25048 x86_builtin_vectorization_cost (bool runtime_test)
25049 {
25050 /* If the branch of the runtime test is taken - i.e. - the vectorized
25051 version is skipped - this incurs a misprediction cost (because the
25052 vectorized version is expected to be the fall-through). So we subtract
25053 the latency of a mispredicted branch from the costs that are incured
25054 when the vectorized version is executed.
25055
25056 TODO: The values in individual target tables have to be tuned or new
25057 fields may be needed. For eg. on K8, the default branch path is the
25058 not-taken path. If the taken path is predicted correctly, the minimum
25059 penalty of going down the taken-path is 1 cycle. If the taken-path is
25060 not predicted correctly, then the minimum penalty is 10 cycles. */
25061
25062 if (runtime_test)
25063 {
25064 return (-(ix86_cost->cond_taken_branch_cost));
25065 }
25066 else
25067 return 0;
25068 }
25069
25070 /* Initialize the GCC target structure. */
25071 #undef TARGET_ATTRIBUTE_TABLE
25072 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25073 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25074 # undef TARGET_MERGE_DECL_ATTRIBUTES
25075 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25076 #endif
25077
25078 #undef TARGET_COMP_TYPE_ATTRIBUTES
25079 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25080
25081 #undef TARGET_INIT_BUILTINS
25082 #define TARGET_INIT_BUILTINS ix86_init_builtins
25083 #undef TARGET_EXPAND_BUILTIN
25084 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25085
25086 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25087 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25088 ix86_builtin_vectorized_function
25089
25090 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
25091 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25092
25093 #undef TARGET_BUILTIN_RECIPROCAL
25094 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25095
25096 #undef TARGET_ASM_FUNCTION_EPILOGUE
25097 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25098
25099 #undef TARGET_ENCODE_SECTION_INFO
25100 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25101 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25102 #else
25103 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25104 #endif
25105
25106 #undef TARGET_ASM_OPEN_PAREN
25107 #define TARGET_ASM_OPEN_PAREN ""
25108 #undef TARGET_ASM_CLOSE_PAREN
25109 #define TARGET_ASM_CLOSE_PAREN ""
25110
25111 #undef TARGET_ASM_ALIGNED_HI_OP
25112 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25113 #undef TARGET_ASM_ALIGNED_SI_OP
25114 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25115 #ifdef ASM_QUAD
25116 #undef TARGET_ASM_ALIGNED_DI_OP
25117 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25118 #endif
25119
25120 #undef TARGET_ASM_UNALIGNED_HI_OP
25121 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25122 #undef TARGET_ASM_UNALIGNED_SI_OP
25123 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25124 #undef TARGET_ASM_UNALIGNED_DI_OP
25125 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25126
25127 #undef TARGET_SCHED_ADJUST_COST
25128 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25129 #undef TARGET_SCHED_ISSUE_RATE
25130 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25131 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25132 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25133 ia32_multipass_dfa_lookahead
25134
25135 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
25136 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
25137
25138 #ifdef HAVE_AS_TLS
25139 #undef TARGET_HAVE_TLS
25140 #define TARGET_HAVE_TLS true
25141 #endif
25142 #undef TARGET_CANNOT_FORCE_CONST_MEM
25143 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
25144 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
25145 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
25146
25147 #undef TARGET_DELEGITIMIZE_ADDRESS
25148 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
25149
25150 #undef TARGET_MS_BITFIELD_LAYOUT_P
25151 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
25152
25153 #if TARGET_MACHO
25154 #undef TARGET_BINDS_LOCAL_P
25155 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
25156 #endif
25157 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25158 #undef TARGET_BINDS_LOCAL_P
25159 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
25160 #endif
25161
25162 #undef TARGET_ASM_OUTPUT_MI_THUNK
25163 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
25164 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
25165 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
25166
25167 #undef TARGET_ASM_FILE_START
25168 #define TARGET_ASM_FILE_START x86_file_start
25169
25170 #undef TARGET_DEFAULT_TARGET_FLAGS
25171 #define TARGET_DEFAULT_TARGET_FLAGS \
25172 (TARGET_DEFAULT \
25173 | TARGET_SUBTARGET_DEFAULT \
25174 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
25175
25176 #undef TARGET_HANDLE_OPTION
25177 #define TARGET_HANDLE_OPTION ix86_handle_option
25178
25179 #undef TARGET_RTX_COSTS
25180 #define TARGET_RTX_COSTS ix86_rtx_costs
25181 #undef TARGET_ADDRESS_COST
25182 #define TARGET_ADDRESS_COST ix86_address_cost
25183
25184 #undef TARGET_FIXED_CONDITION_CODE_REGS
25185 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
25186 #undef TARGET_CC_MODES_COMPATIBLE
25187 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
25188
25189 #undef TARGET_MACHINE_DEPENDENT_REORG
25190 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
25191
25192 #undef TARGET_BUILD_BUILTIN_VA_LIST
25193 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
25194
25195 #undef TARGET_MD_ASM_CLOBBERS
25196 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
25197
25198 #undef TARGET_PROMOTE_PROTOTYPES
25199 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
25200 #undef TARGET_STRUCT_VALUE_RTX
25201 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
25202 #undef TARGET_SETUP_INCOMING_VARARGS
25203 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
25204 #undef TARGET_MUST_PASS_IN_STACK
25205 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
25206 #undef TARGET_PASS_BY_REFERENCE
25207 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
25208 #undef TARGET_INTERNAL_ARG_POINTER
25209 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
25210 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
25211 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
25212 #undef TARGET_STRICT_ARGUMENT_NAMING
25213 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
25214
25215 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
25216 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
25217
25218 #undef TARGET_SCALAR_MODE_SUPPORTED_P
25219 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
25220
25221 #undef TARGET_VECTOR_MODE_SUPPORTED_P
25222 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
25223
25224 #undef TARGET_C_MODE_FOR_SUFFIX
25225 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
25226
25227 #ifdef HAVE_AS_TLS
25228 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
25229 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
25230 #endif
25231
25232 #ifdef SUBTARGET_INSERT_ATTRIBUTES
25233 #undef TARGET_INSERT_ATTRIBUTES
25234 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
25235 #endif
25236
25237 #undef TARGET_MANGLE_TYPE
25238 #define TARGET_MANGLE_TYPE ix86_mangle_type
25239
25240 #undef TARGET_STACK_PROTECT_FAIL
25241 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
25242
25243 #undef TARGET_FUNCTION_VALUE
25244 #define TARGET_FUNCTION_VALUE ix86_function_value
25245
25246 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
25247 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
25248
25249 struct gcc_target targetm = TARGET_INITIALIZER;
25250 \f
25251 #include "gt-i386.h"