i386.c (ix86_function_regparm): Ditto.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "toplev.h"
43 #include "basic-block.h"
44 #include "ggc.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "cgraph.h"
49 #include "tree-gimple.h"
50 #include "dwarf2.h"
51 #include "df.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 static int x86_builtin_vectorization_cost (bool);
56
57 #ifndef CHECK_STACK_LIMIT
58 #define CHECK_STACK_LIMIT (-1)
59 #endif
60
61 /* Return index of given mode in mult and division cost tables. */
62 #define MODE_INDEX(mode) \
63 ((mode) == QImode ? 0 \
64 : (mode) == HImode ? 1 \
65 : (mode) == SImode ? 2 \
66 : (mode) == DImode ? 3 \
67 : 4)
68
69 /* Processor costs (relative to an add) */
70 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
71 #define COSTS_N_BYTES(N) ((N) * 2)
72
73 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74
75 static const
76 struct processor_costs size_cost = { /* costs for tuning for size */
77 COSTS_N_BYTES (2), /* cost of an add instruction */
78 COSTS_N_BYTES (3), /* cost of a lea instruction */
79 COSTS_N_BYTES (2), /* variable shift costs */
80 COSTS_N_BYTES (3), /* constant shift costs */
81 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
82 COSTS_N_BYTES (3), /* HI */
83 COSTS_N_BYTES (3), /* SI */
84 COSTS_N_BYTES (3), /* DI */
85 COSTS_N_BYTES (5)}, /* other */
86 0, /* cost of multiply per each bit set */
87 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
88 COSTS_N_BYTES (3), /* HI */
89 COSTS_N_BYTES (3), /* SI */
90 COSTS_N_BYTES (3), /* DI */
91 COSTS_N_BYTES (5)}, /* other */
92 COSTS_N_BYTES (3), /* cost of movsx */
93 COSTS_N_BYTES (3), /* cost of movzx */
94 0, /* "large" insn */
95 2, /* MOVE_RATIO */
96 2, /* cost for loading QImode using movzbl */
97 {2, 2, 2}, /* cost of loading integer registers
98 in QImode, HImode and SImode.
99 Relative to reg-reg move (2). */
100 {2, 2, 2}, /* cost of storing integer registers */
101 2, /* cost of reg,reg fld/fst */
102 {2, 2, 2}, /* cost of loading fp registers
103 in SFmode, DFmode and XFmode */
104 {2, 2, 2}, /* cost of storing fp registers
105 in SFmode, DFmode and XFmode */
106 3, /* cost of moving MMX register */
107 {3, 3}, /* cost of loading MMX registers
108 in SImode and DImode */
109 {3, 3}, /* cost of storing MMX registers
110 in SImode and DImode */
111 3, /* cost of moving SSE register */
112 {3, 3, 3}, /* cost of loading SSE registers
113 in SImode, DImode and TImode */
114 {3, 3, 3}, /* cost of storing SSE registers
115 in SImode, DImode and TImode */
116 3, /* MMX or SSE register to integer */
117 0, /* size of l1 cache */
118 0, /* size of l2 cache */
119 0, /* size of prefetch block */
120 0, /* number of parallel prefetches */
121 2, /* Branch cost */
122 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
123 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
124 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
125 COSTS_N_BYTES (2), /* cost of FABS instruction. */
126 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
127 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
128 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
129 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
130 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
131 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
132 1, /* scalar_stmt_cost. */
133 1, /* scalar load_cost. */
134 1, /* scalar_store_cost. */
135 1, /* vec_stmt_cost. */
136 1, /* vec_to_scalar_cost. */
137 1, /* scalar_to_vec_cost. */
138 1, /* vec_align_load_cost. */
139 1, /* vec_unalign_load_cost. */
140 1, /* vec_store_cost. */
141 1, /* cond_taken_branch_cost. */
142 1, /* cond_not_taken_branch_cost. */
143 };
144
145 /* Processor costs (relative to an add) */
146 static const
147 struct processor_costs i386_cost = { /* 386 specific costs */
148 COSTS_N_INSNS (1), /* cost of an add instruction */
149 COSTS_N_INSNS (1), /* cost of a lea instruction */
150 COSTS_N_INSNS (3), /* variable shift costs */
151 COSTS_N_INSNS (2), /* constant shift costs */
152 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
153 COSTS_N_INSNS (6), /* HI */
154 COSTS_N_INSNS (6), /* SI */
155 COSTS_N_INSNS (6), /* DI */
156 COSTS_N_INSNS (6)}, /* other */
157 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
158 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
159 COSTS_N_INSNS (23), /* HI */
160 COSTS_N_INSNS (23), /* SI */
161 COSTS_N_INSNS (23), /* DI */
162 COSTS_N_INSNS (23)}, /* other */
163 COSTS_N_INSNS (3), /* cost of movsx */
164 COSTS_N_INSNS (2), /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of storing fp registers
176 in SFmode, DFmode and XFmode */
177 2, /* cost of moving MMX register */
178 {4, 8}, /* cost of loading MMX registers
179 in SImode and DImode */
180 {4, 8}, /* cost of storing MMX registers
181 in SImode and DImode */
182 2, /* cost of moving SSE register */
183 {4, 8, 16}, /* cost of loading SSE registers
184 in SImode, DImode and TImode */
185 {4, 8, 16}, /* cost of storing SSE registers
186 in SImode, DImode and TImode */
187 3, /* MMX or SSE register to integer */
188 0, /* size of l1 cache */
189 0, /* size of l2 cache */
190 0, /* size of prefetch block */
191 0, /* number of parallel prefetches */
192 1, /* Branch cost */
193 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
194 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
195 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
196 COSTS_N_INSNS (22), /* cost of FABS instruction. */
197 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
198 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
199 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
200 DUMMY_STRINGOP_ALGS},
201 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
202 DUMMY_STRINGOP_ALGS},
203 1, /* scalar_stmt_cost. */
204 1, /* scalar load_cost. */
205 1, /* scalar_store_cost. */
206 1, /* vec_stmt_cost. */
207 1, /* vec_to_scalar_cost. */
208 1, /* scalar_to_vec_cost. */
209 1, /* vec_align_load_cost. */
210 2, /* vec_unalign_load_cost. */
211 1, /* vec_store_cost. */
212 3, /* cond_taken_branch_cost. */
213 1, /* cond_not_taken_branch_cost. */
214 };
215
216 static const
217 struct processor_costs i486_cost = { /* 486 specific costs */
218 COSTS_N_INSNS (1), /* cost of an add instruction */
219 COSTS_N_INSNS (1), /* cost of a lea instruction */
220 COSTS_N_INSNS (3), /* variable shift costs */
221 COSTS_N_INSNS (2), /* constant shift costs */
222 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
223 COSTS_N_INSNS (12), /* HI */
224 COSTS_N_INSNS (12), /* SI */
225 COSTS_N_INSNS (12), /* DI */
226 COSTS_N_INSNS (12)}, /* other */
227 1, /* cost of multiply per each bit set */
228 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
229 COSTS_N_INSNS (40), /* HI */
230 COSTS_N_INSNS (40), /* SI */
231 COSTS_N_INSNS (40), /* DI */
232 COSTS_N_INSNS (40)}, /* other */
233 COSTS_N_INSNS (3), /* cost of movsx */
234 COSTS_N_INSNS (2), /* cost of movzx */
235 15, /* "large" insn */
236 3, /* MOVE_RATIO */
237 4, /* cost for loading QImode using movzbl */
238 {2, 4, 2}, /* cost of loading integer registers
239 in QImode, HImode and SImode.
240 Relative to reg-reg move (2). */
241 {2, 4, 2}, /* cost of storing integer registers */
242 2, /* cost of reg,reg fld/fst */
243 {8, 8, 8}, /* cost of loading fp registers
244 in SFmode, DFmode and XFmode */
245 {8, 8, 8}, /* cost of storing fp registers
246 in SFmode, DFmode and XFmode */
247 2, /* cost of moving MMX register */
248 {4, 8}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {4, 8}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {4, 8, 16}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {4, 8, 16}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 4, /* size of l1 cache. 486 has 8kB cache
259 shared for code and data, so 4kB is
260 not really precise. */
261 4, /* size of l2 cache */
262 0, /* size of prefetch block */
263 0, /* number of parallel prefetches */
264 1, /* Branch cost */
265 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
266 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
267 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
268 COSTS_N_INSNS (3), /* cost of FABS instruction. */
269 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
270 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
271 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
272 DUMMY_STRINGOP_ALGS},
273 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
274 DUMMY_STRINGOP_ALGS},
275 1, /* scalar_stmt_cost. */
276 1, /* scalar load_cost. */
277 1, /* scalar_store_cost. */
278 1, /* vec_stmt_cost. */
279 1, /* vec_to_scalar_cost. */
280 1, /* scalar_to_vec_cost. */
281 1, /* vec_align_load_cost. */
282 2, /* vec_unalign_load_cost. */
283 1, /* vec_store_cost. */
284 3, /* cond_taken_branch_cost. */
285 1, /* cond_not_taken_branch_cost. */
286 };
287
288 static const
289 struct processor_costs pentium_cost = {
290 COSTS_N_INSNS (1), /* cost of an add instruction */
291 COSTS_N_INSNS (1), /* cost of a lea instruction */
292 COSTS_N_INSNS (4), /* variable shift costs */
293 COSTS_N_INSNS (1), /* constant shift costs */
294 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
295 COSTS_N_INSNS (11), /* HI */
296 COSTS_N_INSNS (11), /* SI */
297 COSTS_N_INSNS (11), /* DI */
298 COSTS_N_INSNS (11)}, /* other */
299 0, /* cost of multiply per each bit set */
300 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
301 COSTS_N_INSNS (25), /* HI */
302 COSTS_N_INSNS (25), /* SI */
303 COSTS_N_INSNS (25), /* DI */
304 COSTS_N_INSNS (25)}, /* other */
305 COSTS_N_INSNS (3), /* cost of movsx */
306 COSTS_N_INSNS (2), /* cost of movzx */
307 8, /* "large" insn */
308 6, /* MOVE_RATIO */
309 6, /* cost for loading QImode using movzbl */
310 {2, 4, 2}, /* cost of loading integer registers
311 in QImode, HImode and SImode.
312 Relative to reg-reg move (2). */
313 {2, 4, 2}, /* cost of storing integer registers */
314 2, /* cost of reg,reg fld/fst */
315 {2, 2, 6}, /* cost of loading fp registers
316 in SFmode, DFmode and XFmode */
317 {4, 4, 6}, /* cost of storing fp registers
318 in SFmode, DFmode and XFmode */
319 8, /* cost of moving MMX register */
320 {8, 8}, /* cost of loading MMX registers
321 in SImode and DImode */
322 {8, 8}, /* cost of storing MMX registers
323 in SImode and DImode */
324 2, /* cost of moving SSE register */
325 {4, 8, 16}, /* cost of loading SSE registers
326 in SImode, DImode and TImode */
327 {4, 8, 16}, /* cost of storing SSE registers
328 in SImode, DImode and TImode */
329 3, /* MMX or SSE register to integer */
330 8, /* size of l1 cache. */
331 8, /* size of l2 cache */
332 0, /* size of prefetch block */
333 0, /* number of parallel prefetches */
334 2, /* Branch cost */
335 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
336 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
337 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
338 COSTS_N_INSNS (1), /* cost of FABS instruction. */
339 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
340 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
341 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
342 DUMMY_STRINGOP_ALGS},
343 {{libcall, {{-1, rep_prefix_4_byte}}},
344 DUMMY_STRINGOP_ALGS},
345 1, /* scalar_stmt_cost. */
346 1, /* scalar load_cost. */
347 1, /* scalar_store_cost. */
348 1, /* vec_stmt_cost. */
349 1, /* vec_to_scalar_cost. */
350 1, /* scalar_to_vec_cost. */
351 1, /* vec_align_load_cost. */
352 2, /* vec_unalign_load_cost. */
353 1, /* vec_store_cost. */
354 3, /* cond_taken_branch_cost. */
355 1, /* cond_not_taken_branch_cost. */
356 };
357
358 static const
359 struct processor_costs pentiumpro_cost = {
360 COSTS_N_INSNS (1), /* cost of an add instruction */
361 COSTS_N_INSNS (1), /* cost of a lea instruction */
362 COSTS_N_INSNS (1), /* variable shift costs */
363 COSTS_N_INSNS (1), /* constant shift costs */
364 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
365 COSTS_N_INSNS (4), /* HI */
366 COSTS_N_INSNS (4), /* SI */
367 COSTS_N_INSNS (4), /* DI */
368 COSTS_N_INSNS (4)}, /* other */
369 0, /* cost of multiply per each bit set */
370 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
371 COSTS_N_INSNS (17), /* HI */
372 COSTS_N_INSNS (17), /* SI */
373 COSTS_N_INSNS (17), /* DI */
374 COSTS_N_INSNS (17)}, /* other */
375 COSTS_N_INSNS (1), /* cost of movsx */
376 COSTS_N_INSNS (1), /* cost of movzx */
377 8, /* "large" insn */
378 6, /* MOVE_RATIO */
379 2, /* cost for loading QImode using movzbl */
380 {4, 4, 4}, /* cost of loading integer registers
381 in QImode, HImode and SImode.
382 Relative to reg-reg move (2). */
383 {2, 2, 2}, /* cost of storing integer registers */
384 2, /* cost of reg,reg fld/fst */
385 {2, 2, 6}, /* cost of loading fp registers
386 in SFmode, DFmode and XFmode */
387 {4, 4, 6}, /* cost of storing fp registers
388 in SFmode, DFmode and XFmode */
389 2, /* cost of moving MMX register */
390 {2, 2}, /* cost of loading MMX registers
391 in SImode and DImode */
392 {2, 2}, /* cost of storing MMX registers
393 in SImode and DImode */
394 2, /* cost of moving SSE register */
395 {2, 2, 8}, /* cost of loading SSE registers
396 in SImode, DImode and TImode */
397 {2, 2, 8}, /* cost of storing SSE registers
398 in SImode, DImode and TImode */
399 3, /* MMX or SSE register to integer */
400 8, /* size of l1 cache. */
401 256, /* size of l2 cache */
402 32, /* size of prefetch block */
403 6, /* number of parallel prefetches */
404 2, /* Branch cost */
405 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
406 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
407 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
408 COSTS_N_INSNS (2), /* cost of FABS instruction. */
409 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
410 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
411 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
412 the alignment). For small blocks inline loop is still a noticeable win, for bigger
413 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
414 more expensive startup time in CPU, but after 4K the difference is down in the noise.
415 */
416 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
417 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
418 DUMMY_STRINGOP_ALGS},
419 {{rep_prefix_4_byte, {{1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS},
422 1, /* scalar_stmt_cost. */
423 1, /* scalar load_cost. */
424 1, /* scalar_store_cost. */
425 1, /* vec_stmt_cost. */
426 1, /* vec_to_scalar_cost. */
427 1, /* scalar_to_vec_cost. */
428 1, /* vec_align_load_cost. */
429 2, /* vec_unalign_load_cost. */
430 1, /* vec_store_cost. */
431 3, /* cond_taken_branch_cost. */
432 1, /* cond_not_taken_branch_cost. */
433 };
434
435 static const
436 struct processor_costs geode_cost = {
437 COSTS_N_INSNS (1), /* cost of an add instruction */
438 COSTS_N_INSNS (1), /* cost of a lea instruction */
439 COSTS_N_INSNS (2), /* variable shift costs */
440 COSTS_N_INSNS (1), /* constant shift costs */
441 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
442 COSTS_N_INSNS (4), /* HI */
443 COSTS_N_INSNS (7), /* SI */
444 COSTS_N_INSNS (7), /* DI */
445 COSTS_N_INSNS (7)}, /* other */
446 0, /* cost of multiply per each bit set */
447 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
448 COSTS_N_INSNS (23), /* HI */
449 COSTS_N_INSNS (39), /* SI */
450 COSTS_N_INSNS (39), /* DI */
451 COSTS_N_INSNS (39)}, /* other */
452 COSTS_N_INSNS (1), /* cost of movsx */
453 COSTS_N_INSNS (1), /* cost of movzx */
454 8, /* "large" insn */
455 4, /* MOVE_RATIO */
456 1, /* cost for loading QImode using movzbl */
457 {1, 1, 1}, /* cost of loading integer registers
458 in QImode, HImode and SImode.
459 Relative to reg-reg move (2). */
460 {1, 1, 1}, /* cost of storing integer registers */
461 1, /* cost of reg,reg fld/fst */
462 {1, 1, 1}, /* cost of loading fp registers
463 in SFmode, DFmode and XFmode */
464 {4, 6, 6}, /* cost of storing fp registers
465 in SFmode, DFmode and XFmode */
466
467 1, /* cost of moving MMX register */
468 {1, 1}, /* cost of loading MMX registers
469 in SImode and DImode */
470 {1, 1}, /* cost of storing MMX registers
471 in SImode and DImode */
472 1, /* cost of moving SSE register */
473 {1, 1, 1}, /* cost of loading SSE registers
474 in SImode, DImode and TImode */
475 {1, 1, 1}, /* cost of storing SSE registers
476 in SImode, DImode and TImode */
477 1, /* MMX or SSE register to integer */
478 64, /* size of l1 cache. */
479 128, /* size of l2 cache. */
480 32, /* size of prefetch block */
481 1, /* number of parallel prefetches */
482 1, /* Branch cost */
483 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
484 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
485 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
486 COSTS_N_INSNS (1), /* cost of FABS instruction. */
487 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
488 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
489 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
490 DUMMY_STRINGOP_ALGS},
491 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
492 DUMMY_STRINGOP_ALGS},
493 1, /* scalar_stmt_cost. */
494 1, /* scalar load_cost. */
495 1, /* scalar_store_cost. */
496 1, /* vec_stmt_cost. */
497 1, /* vec_to_scalar_cost. */
498 1, /* scalar_to_vec_cost. */
499 1, /* vec_align_load_cost. */
500 2, /* vec_unalign_load_cost. */
501 1, /* vec_store_cost. */
502 3, /* cond_taken_branch_cost. */
503 1, /* cond_not_taken_branch_cost. */
504 };
505
506 static const
507 struct processor_costs k6_cost = {
508 COSTS_N_INSNS (1), /* cost of an add instruction */
509 COSTS_N_INSNS (2), /* cost of a lea instruction */
510 COSTS_N_INSNS (1), /* variable shift costs */
511 COSTS_N_INSNS (1), /* constant shift costs */
512 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
513 COSTS_N_INSNS (3), /* HI */
514 COSTS_N_INSNS (3), /* SI */
515 COSTS_N_INSNS (3), /* DI */
516 COSTS_N_INSNS (3)}, /* other */
517 0, /* cost of multiply per each bit set */
518 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
519 COSTS_N_INSNS (18), /* HI */
520 COSTS_N_INSNS (18), /* SI */
521 COSTS_N_INSNS (18), /* DI */
522 COSTS_N_INSNS (18)}, /* other */
523 COSTS_N_INSNS (2), /* cost of movsx */
524 COSTS_N_INSNS (2), /* cost of movzx */
525 8, /* "large" insn */
526 4, /* MOVE_RATIO */
527 3, /* cost for loading QImode using movzbl */
528 {4, 5, 4}, /* cost of loading integer registers
529 in QImode, HImode and SImode.
530 Relative to reg-reg move (2). */
531 {2, 3, 2}, /* cost of storing integer registers */
532 4, /* cost of reg,reg fld/fst */
533 {6, 6, 6}, /* cost of loading fp registers
534 in SFmode, DFmode and XFmode */
535 {4, 4, 4}, /* cost of storing fp registers
536 in SFmode, DFmode and XFmode */
537 2, /* cost of moving MMX register */
538 {2, 2}, /* cost of loading MMX registers
539 in SImode and DImode */
540 {2, 2}, /* cost of storing MMX registers
541 in SImode and DImode */
542 2, /* cost of moving SSE register */
543 {2, 2, 8}, /* cost of loading SSE registers
544 in SImode, DImode and TImode */
545 {2, 2, 8}, /* cost of storing SSE registers
546 in SImode, DImode and TImode */
547 6, /* MMX or SSE register to integer */
548 32, /* size of l1 cache. */
549 32, /* size of l2 cache. Some models
550 have integrated l2 cache, but
551 optimizing for k6 is not important
552 enough to worry about that. */
553 32, /* size of prefetch block */
554 1, /* number of parallel prefetches */
555 1, /* Branch cost */
556 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
557 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
558 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
559 COSTS_N_INSNS (2), /* cost of FABS instruction. */
560 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
561 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
562 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
563 DUMMY_STRINGOP_ALGS},
564 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
565 DUMMY_STRINGOP_ALGS},
566 1, /* scalar_stmt_cost. */
567 1, /* scalar load_cost. */
568 1, /* scalar_store_cost. */
569 1, /* vec_stmt_cost. */
570 1, /* vec_to_scalar_cost. */
571 1, /* scalar_to_vec_cost. */
572 1, /* vec_align_load_cost. */
573 2, /* vec_unalign_load_cost. */
574 1, /* vec_store_cost. */
575 3, /* cond_taken_branch_cost. */
576 1, /* cond_not_taken_branch_cost. */
577 };
578
579 static const
580 struct processor_costs athlon_cost = {
581 COSTS_N_INSNS (1), /* cost of an add instruction */
582 COSTS_N_INSNS (2), /* cost of a lea instruction */
583 COSTS_N_INSNS (1), /* variable shift costs */
584 COSTS_N_INSNS (1), /* constant shift costs */
585 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
586 COSTS_N_INSNS (5), /* HI */
587 COSTS_N_INSNS (5), /* SI */
588 COSTS_N_INSNS (5), /* DI */
589 COSTS_N_INSNS (5)}, /* other */
590 0, /* cost of multiply per each bit set */
591 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
592 COSTS_N_INSNS (26), /* HI */
593 COSTS_N_INSNS (42), /* SI */
594 COSTS_N_INSNS (74), /* DI */
595 COSTS_N_INSNS (74)}, /* other */
596 COSTS_N_INSNS (1), /* cost of movsx */
597 COSTS_N_INSNS (1), /* cost of movzx */
598 8, /* "large" insn */
599 9, /* MOVE_RATIO */
600 4, /* cost for loading QImode using movzbl */
601 {3, 4, 3}, /* cost of loading integer registers
602 in QImode, HImode and SImode.
603 Relative to reg-reg move (2). */
604 {3, 4, 3}, /* cost of storing integer registers */
605 4, /* cost of reg,reg fld/fst */
606 {4, 4, 12}, /* cost of loading fp registers
607 in SFmode, DFmode and XFmode */
608 {6, 6, 8}, /* cost of storing fp registers
609 in SFmode, DFmode and XFmode */
610 2, /* cost of moving MMX register */
611 {4, 4}, /* cost of loading MMX registers
612 in SImode and DImode */
613 {4, 4}, /* cost of storing MMX registers
614 in SImode and DImode */
615 2, /* cost of moving SSE register */
616 {4, 4, 6}, /* cost of loading SSE registers
617 in SImode, DImode and TImode */
618 {4, 4, 5}, /* cost of storing SSE registers
619 in SImode, DImode and TImode */
620 5, /* MMX or SSE register to integer */
621 64, /* size of l1 cache. */
622 256, /* size of l2 cache. */
623 64, /* size of prefetch block */
624 6, /* number of parallel prefetches */
625 5, /* Branch cost */
626 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
627 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
628 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
629 COSTS_N_INSNS (2), /* cost of FABS instruction. */
630 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
631 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
632 /* For some reason, Athlon deals better with REP prefix (relative to loops)
633 compared to K8. Alignment becomes important after 8 bytes for memcpy and
634 128 bytes for memset. */
635 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
636 DUMMY_STRINGOP_ALGS},
637 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
638 DUMMY_STRINGOP_ALGS},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
650 };
651
652 static const
653 struct processor_costs k8_cost = {
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (2), /* cost of a lea instruction */
656 COSTS_N_INSNS (1), /* variable shift costs */
657 COSTS_N_INSNS (1), /* constant shift costs */
658 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (4), /* HI */
660 COSTS_N_INSNS (3), /* SI */
661 COSTS_N_INSNS (4), /* DI */
662 COSTS_N_INSNS (5)}, /* other */
663 0, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (26), /* HI */
666 COSTS_N_INSNS (42), /* SI */
667 COSTS_N_INSNS (74), /* DI */
668 COSTS_N_INSNS (74)}, /* other */
669 COSTS_N_INSNS (1), /* cost of movsx */
670 COSTS_N_INSNS (1), /* cost of movzx */
671 8, /* "large" insn */
672 9, /* MOVE_RATIO */
673 4, /* cost for loading QImode using movzbl */
674 {3, 4, 3}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {3, 4, 3}, /* cost of storing integer registers */
678 4, /* cost of reg,reg fld/fst */
679 {4, 4, 12}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {6, 6, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {3, 3}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 4}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 3, 6}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 4, 5}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 5, /* MMX or SSE register to integer */
694 64, /* size of l1 cache. */
695 512, /* size of l2 cache. */
696 64, /* size of prefetch block */
697 /* New AMD processors never drop prefetches; if they cannot be performed
698 immediately, they are queued. We set number of simultaneous prefetches
699 to a large constant to reflect this (it probably is not a good idea not
700 to limit number of prefetches at all, as their execution also takes some
701 time). */
702 100, /* number of parallel prefetches */
703 5, /* Branch cost */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
710 /* K8 has optimized REP instruction for medium sized blocks, but for very small
711 blocks it is better to use loop. For large blocks, libcall can do
712 nontemporary accesses and beat inline considerably. */
713 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
714 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
715 {{libcall, {{8, loop}, {24, unrolled_loop},
716 {2048, rep_prefix_4_byte}, {-1, libcall}}},
717 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 4, /* scalar_stmt_cost. */
719 2, /* scalar load_cost. */
720 2, /* scalar_store_cost. */
721 5, /* vec_stmt_cost. */
722 0, /* vec_to_scalar_cost. */
723 2, /* scalar_to_vec_cost. */
724 2, /* vec_align_load_cost. */
725 3, /* vec_unalign_load_cost. */
726 3, /* vec_store_cost. */
727 6, /* cond_taken_branch_cost. */
728 1, /* cond_not_taken_branch_cost. */
729 };
730
731 struct processor_costs amdfam10_cost = {
732 COSTS_N_INSNS (1), /* cost of an add instruction */
733 COSTS_N_INSNS (2), /* cost of a lea instruction */
734 COSTS_N_INSNS (1), /* variable shift costs */
735 COSTS_N_INSNS (1), /* constant shift costs */
736 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
737 COSTS_N_INSNS (4), /* HI */
738 COSTS_N_INSNS (3), /* SI */
739 COSTS_N_INSNS (4), /* DI */
740 COSTS_N_INSNS (5)}, /* other */
741 0, /* cost of multiply per each bit set */
742 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
743 COSTS_N_INSNS (35), /* HI */
744 COSTS_N_INSNS (51), /* SI */
745 COSTS_N_INSNS (83), /* DI */
746 COSTS_N_INSNS (83)}, /* other */
747 COSTS_N_INSNS (1), /* cost of movsx */
748 COSTS_N_INSNS (1), /* cost of movzx */
749 8, /* "large" insn */
750 9, /* MOVE_RATIO */
751 4, /* cost for loading QImode using movzbl */
752 {3, 4, 3}, /* cost of loading integer registers
753 in QImode, HImode and SImode.
754 Relative to reg-reg move (2). */
755 {3, 4, 3}, /* cost of storing integer registers */
756 4, /* cost of reg,reg fld/fst */
757 {4, 4, 12}, /* cost of loading fp registers
758 in SFmode, DFmode and XFmode */
759 {6, 6, 8}, /* cost of storing fp registers
760 in SFmode, DFmode and XFmode */
761 2, /* cost of moving MMX register */
762 {3, 3}, /* cost of loading MMX registers
763 in SImode and DImode */
764 {4, 4}, /* cost of storing MMX registers
765 in SImode and DImode */
766 2, /* cost of moving SSE register */
767 {4, 4, 3}, /* cost of loading SSE registers
768 in SImode, DImode and TImode */
769 {4, 4, 5}, /* cost of storing SSE registers
770 in SImode, DImode and TImode */
771 3, /* MMX or SSE register to integer */
772 /* On K8
773 MOVD reg64, xmmreg Double FSTORE 4
774 MOVD reg32, xmmreg Double FSTORE 4
775 On AMDFAM10
776 MOVD reg64, xmmreg Double FADD 3
777 1/1 1/1
778 MOVD reg32, xmmreg Double FADD 3
779 1/1 1/1 */
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
787 time). */
788 100, /* number of parallel prefetches */
789 5, /* Branch cost */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
796
797 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
798 very small blocks it is better to use loop. For large blocks, libcall can
799 do nontemporary accesses and beat inline considerably. */
800 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
801 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
802 {{libcall, {{8, loop}, {24, unrolled_loop},
803 {2048, rep_prefix_4_byte}, {-1, libcall}}},
804 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 4, /* scalar_stmt_cost. */
806 2, /* scalar load_cost. */
807 2, /* scalar_store_cost. */
808 6, /* vec_stmt_cost. */
809 0, /* vec_to_scalar_cost. */
810 2, /* scalar_to_vec_cost. */
811 2, /* vec_align_load_cost. */
812 2, /* vec_unalign_load_cost. */
813 2, /* vec_store_cost. */
814 6, /* cond_taken_branch_cost. */
815 1, /* cond_not_taken_branch_cost. */
816 };
817
818 static const
819 struct processor_costs pentium4_cost = {
820 COSTS_N_INSNS (1), /* cost of an add instruction */
821 COSTS_N_INSNS (3), /* cost of a lea instruction */
822 COSTS_N_INSNS (4), /* variable shift costs */
823 COSTS_N_INSNS (4), /* constant shift costs */
824 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
825 COSTS_N_INSNS (15), /* HI */
826 COSTS_N_INSNS (15), /* SI */
827 COSTS_N_INSNS (15), /* DI */
828 COSTS_N_INSNS (15)}, /* other */
829 0, /* cost of multiply per each bit set */
830 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
831 COSTS_N_INSNS (56), /* HI */
832 COSTS_N_INSNS (56), /* SI */
833 COSTS_N_INSNS (56), /* DI */
834 COSTS_N_INSNS (56)}, /* other */
835 COSTS_N_INSNS (1), /* cost of movsx */
836 COSTS_N_INSNS (1), /* cost of movzx */
837 16, /* "large" insn */
838 6, /* MOVE_RATIO */
839 2, /* cost for loading QImode using movzbl */
840 {4, 5, 4}, /* cost of loading integer registers
841 in QImode, HImode and SImode.
842 Relative to reg-reg move (2). */
843 {2, 3, 2}, /* cost of storing integer registers */
844 2, /* cost of reg,reg fld/fst */
845 {2, 2, 6}, /* cost of loading fp registers
846 in SFmode, DFmode and XFmode */
847 {4, 4, 6}, /* cost of storing fp registers
848 in SFmode, DFmode and XFmode */
849 2, /* cost of moving MMX register */
850 {2, 2}, /* cost of loading MMX registers
851 in SImode and DImode */
852 {2, 2}, /* cost of storing MMX registers
853 in SImode and DImode */
854 12, /* cost of moving SSE register */
855 {12, 12, 12}, /* cost of loading SSE registers
856 in SImode, DImode and TImode */
857 {2, 2, 8}, /* cost of storing SSE registers
858 in SImode, DImode and TImode */
859 10, /* MMX or SSE register to integer */
860 8, /* size of l1 cache. */
861 256, /* size of l2 cache. */
862 64, /* size of prefetch block */
863 6, /* number of parallel prefetches */
864 2, /* Branch cost */
865 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
866 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
867 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
868 COSTS_N_INSNS (2), /* cost of FABS instruction. */
869 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
870 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
871 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
872 DUMMY_STRINGOP_ALGS},
873 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
874 {-1, libcall}}},
875 DUMMY_STRINGOP_ALGS},
876 1, /* scalar_stmt_cost. */
877 1, /* scalar load_cost. */
878 1, /* scalar_store_cost. */
879 1, /* vec_stmt_cost. */
880 1, /* vec_to_scalar_cost. */
881 1, /* scalar_to_vec_cost. */
882 1, /* vec_align_load_cost. */
883 2, /* vec_unalign_load_cost. */
884 1, /* vec_store_cost. */
885 3, /* cond_taken_branch_cost. */
886 1, /* cond_not_taken_branch_cost. */
887 };
888
889 static const
890 struct processor_costs nocona_cost = {
891 COSTS_N_INSNS (1), /* cost of an add instruction */
892 COSTS_N_INSNS (1), /* cost of a lea instruction */
893 COSTS_N_INSNS (1), /* variable shift costs */
894 COSTS_N_INSNS (1), /* constant shift costs */
895 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
896 COSTS_N_INSNS (10), /* HI */
897 COSTS_N_INSNS (10), /* SI */
898 COSTS_N_INSNS (10), /* DI */
899 COSTS_N_INSNS (10)}, /* other */
900 0, /* cost of multiply per each bit set */
901 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
902 COSTS_N_INSNS (66), /* HI */
903 COSTS_N_INSNS (66), /* SI */
904 COSTS_N_INSNS (66), /* DI */
905 COSTS_N_INSNS (66)}, /* other */
906 COSTS_N_INSNS (1), /* cost of movsx */
907 COSTS_N_INSNS (1), /* cost of movzx */
908 16, /* "large" insn */
909 17, /* MOVE_RATIO */
910 4, /* cost for loading QImode using movzbl */
911 {4, 4, 4}, /* cost of loading integer registers
912 in QImode, HImode and SImode.
913 Relative to reg-reg move (2). */
914 {4, 4, 4}, /* cost of storing integer registers */
915 3, /* cost of reg,reg fld/fst */
916 {12, 12, 12}, /* cost of loading fp registers
917 in SFmode, DFmode and XFmode */
918 {4, 4, 4}, /* cost of storing fp registers
919 in SFmode, DFmode and XFmode */
920 6, /* cost of moving MMX register */
921 {12, 12}, /* cost of loading MMX registers
922 in SImode and DImode */
923 {12, 12}, /* cost of storing MMX registers
924 in SImode and DImode */
925 6, /* cost of moving SSE register */
926 {12, 12, 12}, /* cost of loading SSE registers
927 in SImode, DImode and TImode */
928 {12, 12, 12}, /* cost of storing SSE registers
929 in SImode, DImode and TImode */
930 8, /* MMX or SSE register to integer */
931 8, /* size of l1 cache. */
932 1024, /* size of l2 cache. */
933 128, /* size of prefetch block */
934 8, /* number of parallel prefetches */
935 1, /* Branch cost */
936 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
937 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
938 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
939 COSTS_N_INSNS (3), /* cost of FABS instruction. */
940 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
941 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
942 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
943 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
944 {100000, unrolled_loop}, {-1, libcall}}}},
945 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
946 {-1, libcall}}},
947 {libcall, {{24, loop}, {64, unrolled_loop},
948 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
949 1, /* scalar_stmt_cost. */
950 1, /* scalar load_cost. */
951 1, /* scalar_store_cost. */
952 1, /* vec_stmt_cost. */
953 1, /* vec_to_scalar_cost. */
954 1, /* scalar_to_vec_cost. */
955 1, /* vec_align_load_cost. */
956 2, /* vec_unalign_load_cost. */
957 1, /* vec_store_cost. */
958 3, /* cond_taken_branch_cost. */
959 1, /* cond_not_taken_branch_cost. */
960 };
961
962 static const
963 struct processor_costs core2_cost = {
964 COSTS_N_INSNS (1), /* cost of an add instruction */
965 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
966 COSTS_N_INSNS (1), /* variable shift costs */
967 COSTS_N_INSNS (1), /* constant shift costs */
968 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
969 COSTS_N_INSNS (3), /* HI */
970 COSTS_N_INSNS (3), /* SI */
971 COSTS_N_INSNS (3), /* DI */
972 COSTS_N_INSNS (3)}, /* other */
973 0, /* cost of multiply per each bit set */
974 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
975 COSTS_N_INSNS (22), /* HI */
976 COSTS_N_INSNS (22), /* SI */
977 COSTS_N_INSNS (22), /* DI */
978 COSTS_N_INSNS (22)}, /* other */
979 COSTS_N_INSNS (1), /* cost of movsx */
980 COSTS_N_INSNS (1), /* cost of movzx */
981 8, /* "large" insn */
982 16, /* MOVE_RATIO */
983 2, /* cost for loading QImode using movzbl */
984 {6, 6, 6}, /* cost of loading integer registers
985 in QImode, HImode and SImode.
986 Relative to reg-reg move (2). */
987 {4, 4, 4}, /* cost of storing integer registers */
988 2, /* cost of reg,reg fld/fst */
989 {6, 6, 6}, /* cost of loading fp registers
990 in SFmode, DFmode and XFmode */
991 {4, 4, 4}, /* cost of loading integer registers */
992 2, /* cost of moving MMX register */
993 {6, 6}, /* cost of loading MMX registers
994 in SImode and DImode */
995 {4, 4}, /* cost of storing MMX registers
996 in SImode and DImode */
997 2, /* cost of moving SSE register */
998 {6, 6, 6}, /* cost of loading SSE registers
999 in SImode, DImode and TImode */
1000 {4, 4, 4}, /* cost of storing SSE registers
1001 in SImode, DImode and TImode */
1002 2, /* MMX or SSE register to integer */
1003 32, /* size of l1 cache. */
1004 2048, /* size of l2 cache. */
1005 128, /* size of prefetch block */
1006 8, /* number of parallel prefetches */
1007 3, /* Branch cost */
1008 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1009 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1010 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1011 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1012 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1013 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1014 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1015 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1016 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1017 {{libcall, {{8, loop}, {15, unrolled_loop},
1018 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1019 {libcall, {{24, loop}, {32, unrolled_loop},
1020 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1021 1, /* scalar_stmt_cost. */
1022 1, /* scalar load_cost. */
1023 1, /* scalar_store_cost. */
1024 1, /* vec_stmt_cost. */
1025 1, /* vec_to_scalar_cost. */
1026 1, /* scalar_to_vec_cost. */
1027 1, /* vec_align_load_cost. */
1028 2, /* vec_unalign_load_cost. */
1029 1, /* vec_store_cost. */
1030 3, /* cond_taken_branch_cost. */
1031 1, /* cond_not_taken_branch_cost. */
1032 };
1033
1034 /* Generic64 should produce code tuned for Nocona and K8. */
1035 static const
1036 struct processor_costs generic64_cost = {
1037 COSTS_N_INSNS (1), /* cost of an add instruction */
1038 /* On all chips taken into consideration lea is 2 cycles and more. With
1039 this cost however our current implementation of synth_mult results in
1040 use of unnecessary temporary registers causing regression on several
1041 SPECfp benchmarks. */
1042 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1043 COSTS_N_INSNS (1), /* variable shift costs */
1044 COSTS_N_INSNS (1), /* constant shift costs */
1045 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1046 COSTS_N_INSNS (4), /* HI */
1047 COSTS_N_INSNS (3), /* SI */
1048 COSTS_N_INSNS (4), /* DI */
1049 COSTS_N_INSNS (2)}, /* other */
1050 0, /* cost of multiply per each bit set */
1051 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1052 COSTS_N_INSNS (26), /* HI */
1053 COSTS_N_INSNS (42), /* SI */
1054 COSTS_N_INSNS (74), /* DI */
1055 COSTS_N_INSNS (74)}, /* other */
1056 COSTS_N_INSNS (1), /* cost of movsx */
1057 COSTS_N_INSNS (1), /* cost of movzx */
1058 8, /* "large" insn */
1059 17, /* MOVE_RATIO */
1060 4, /* cost for loading QImode using movzbl */
1061 {4, 4, 4}, /* cost of loading integer registers
1062 in QImode, HImode and SImode.
1063 Relative to reg-reg move (2). */
1064 {4, 4, 4}, /* cost of storing integer registers */
1065 4, /* cost of reg,reg fld/fst */
1066 {12, 12, 12}, /* cost of loading fp registers
1067 in SFmode, DFmode and XFmode */
1068 {6, 6, 8}, /* cost of storing fp registers
1069 in SFmode, DFmode and XFmode */
1070 2, /* cost of moving MMX register */
1071 {8, 8}, /* cost of loading MMX registers
1072 in SImode and DImode */
1073 {8, 8}, /* cost of storing MMX registers
1074 in SImode and DImode */
1075 2, /* cost of moving SSE register */
1076 {8, 8, 8}, /* cost of loading SSE registers
1077 in SImode, DImode and TImode */
1078 {8, 8, 8}, /* cost of storing SSE registers
1079 in SImode, DImode and TImode */
1080 5, /* MMX or SSE register to integer */
1081 32, /* size of l1 cache. */
1082 512, /* size of l2 cache. */
1083 64, /* size of prefetch block */
1084 6, /* number of parallel prefetches */
1085 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1086 is increased to perhaps more appropriate value of 5. */
1087 3, /* Branch cost */
1088 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1089 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1090 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1091 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1092 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1093 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1094 {DUMMY_STRINGOP_ALGS,
1095 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1096 {DUMMY_STRINGOP_ALGS,
1097 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1109 };
1110
1111 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1112 static const
1113 struct processor_costs generic32_cost = {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1116 COSTS_N_INSNS (1), /* variable shift costs */
1117 COSTS_N_INSNS (1), /* constant shift costs */
1118 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1119 COSTS_N_INSNS (4), /* HI */
1120 COSTS_N_INSNS (3), /* SI */
1121 COSTS_N_INSNS (4), /* DI */
1122 COSTS_N_INSNS (2)}, /* other */
1123 0, /* cost of multiply per each bit set */
1124 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1125 COSTS_N_INSNS (26), /* HI */
1126 COSTS_N_INSNS (42), /* SI */
1127 COSTS_N_INSNS (74), /* DI */
1128 COSTS_N_INSNS (74)}, /* other */
1129 COSTS_N_INSNS (1), /* cost of movsx */
1130 COSTS_N_INSNS (1), /* cost of movzx */
1131 8, /* "large" insn */
1132 17, /* MOVE_RATIO */
1133 4, /* cost for loading QImode using movzbl */
1134 {4, 4, 4}, /* cost of loading integer registers
1135 in QImode, HImode and SImode.
1136 Relative to reg-reg move (2). */
1137 {4, 4, 4}, /* cost of storing integer registers */
1138 4, /* cost of reg,reg fld/fst */
1139 {12, 12, 12}, /* cost of loading fp registers
1140 in SFmode, DFmode and XFmode */
1141 {6, 6, 8}, /* cost of storing fp registers
1142 in SFmode, DFmode and XFmode */
1143 2, /* cost of moving MMX register */
1144 {8, 8}, /* cost of loading MMX registers
1145 in SImode and DImode */
1146 {8, 8}, /* cost of storing MMX registers
1147 in SImode and DImode */
1148 2, /* cost of moving SSE register */
1149 {8, 8, 8}, /* cost of loading SSE registers
1150 in SImode, DImode and TImode */
1151 {8, 8, 8}, /* cost of storing SSE registers
1152 in SImode, DImode and TImode */
1153 5, /* MMX or SSE register to integer */
1154 32, /* size of l1 cache. */
1155 256, /* size of l2 cache. */
1156 64, /* size of prefetch block */
1157 6, /* number of parallel prefetches */
1158 3, /* Branch cost */
1159 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1160 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1161 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1162 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1163 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1164 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1165 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1166 DUMMY_STRINGOP_ALGS},
1167 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1168 DUMMY_STRINGOP_ALGS},
1169 1, /* scalar_stmt_cost. */
1170 1, /* scalar load_cost. */
1171 1, /* scalar_store_cost. */
1172 1, /* vec_stmt_cost. */
1173 1, /* vec_to_scalar_cost. */
1174 1, /* scalar_to_vec_cost. */
1175 1, /* vec_align_load_cost. */
1176 2, /* vec_unalign_load_cost. */
1177 1, /* vec_store_cost. */
1178 3, /* cond_taken_branch_cost. */
1179 1, /* cond_not_taken_branch_cost. */
1180 };
1181
1182 const struct processor_costs *ix86_cost = &pentium_cost;
1183
1184 /* Processor feature/optimization bitmasks. */
1185 #define m_386 (1<<PROCESSOR_I386)
1186 #define m_486 (1<<PROCESSOR_I486)
1187 #define m_PENT (1<<PROCESSOR_PENTIUM)
1188 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1189 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1190 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1191 #define m_CORE2 (1<<PROCESSOR_CORE2)
1192
1193 #define m_GEODE (1<<PROCESSOR_GEODE)
1194 #define m_K6 (1<<PROCESSOR_K6)
1195 #define m_K6_GEODE (m_K6 | m_GEODE)
1196 #define m_K8 (1<<PROCESSOR_K8)
1197 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1198 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1199 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1200 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1201
1202 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1203 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1204
1205 /* Generic instruction choice should be common subset of supported CPUs
1206 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1207 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1208
1209 /* Feature tests against the various tunings. */
1210 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1211 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1212 negatively, so enabling for Generic64 seems like good code size
1213 tradeoff. We can't enable it for 32bit generic because it does not
1214 work well with PPro base chips. */
1215 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1216
1217 /* X86_TUNE_PUSH_MEMORY */
1218 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1219 | m_NOCONA | m_CORE2 | m_GENERIC,
1220
1221 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1222 m_486 | m_PENT,
1223
1224 /* X86_TUNE_USE_BIT_TEST */
1225 m_386,
1226
1227 /* X86_TUNE_UNROLL_STRLEN */
1228 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1229
1230 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1231 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1232
1233 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1234 on simulation result. But after P4 was made, no performance benefit
1235 was observed with branch hints. It also increases the code size.
1236 As a result, icc never generates branch hints. */
1237 0,
1238
1239 /* X86_TUNE_DOUBLE_WITH_ADD */
1240 ~m_386,
1241
1242 /* X86_TUNE_USE_SAHF */
1243 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1244 | m_NOCONA | m_CORE2 | m_GENERIC,
1245
1246 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1247 partial dependencies. */
1248 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1249 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1250
1251 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1252 register stalls on Generic32 compilation setting as well. However
1253 in current implementation the partial register stalls are not eliminated
1254 very well - they can be introduced via subregs synthesized by combine
1255 and can happen in caller/callee saving sequences. Because this option
1256 pays back little on PPro based chips and is in conflict with partial reg
1257 dependencies used by Athlon/P4 based chips, it is better to leave it off
1258 for generic32 for now. */
1259 m_PPRO,
1260
1261 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1262 m_CORE2 | m_GENERIC,
1263
1264 /* X86_TUNE_USE_HIMODE_FIOP */
1265 m_386 | m_486 | m_K6_GEODE,
1266
1267 /* X86_TUNE_USE_SIMODE_FIOP */
1268 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1269
1270 /* X86_TUNE_USE_MOV0 */
1271 m_K6,
1272
1273 /* X86_TUNE_USE_CLTD */
1274 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1275
1276 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1277 m_PENT4,
1278
1279 /* X86_TUNE_SPLIT_LONG_MOVES */
1280 m_PPRO,
1281
1282 /* X86_TUNE_READ_MODIFY_WRITE */
1283 ~m_PENT,
1284
1285 /* X86_TUNE_READ_MODIFY */
1286 ~(m_PENT | m_PPRO),
1287
1288 /* X86_TUNE_PROMOTE_QIMODE */
1289 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1290 | m_GENERIC /* | m_PENT4 ? */,
1291
1292 /* X86_TUNE_FAST_PREFIX */
1293 ~(m_PENT | m_486 | m_386),
1294
1295 /* X86_TUNE_SINGLE_STRINGOP */
1296 m_386 | m_PENT4 | m_NOCONA,
1297
1298 /* X86_TUNE_QIMODE_MATH */
1299 ~0,
1300
1301 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1302 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1303 might be considered for Generic32 if our scheme for avoiding partial
1304 stalls was more effective. */
1305 ~m_PPRO,
1306
1307 /* X86_TUNE_PROMOTE_QI_REGS */
1308 0,
1309
1310 /* X86_TUNE_PROMOTE_HI_REGS */
1311 m_PPRO,
1312
1313 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1314 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1315
1316 /* X86_TUNE_ADD_ESP_8 */
1317 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1318 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1319
1320 /* X86_TUNE_SUB_ESP_4 */
1321 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1322
1323 /* X86_TUNE_SUB_ESP_8 */
1324 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1325 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1326
1327 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1328 for DFmode copies */
1329 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1330 | m_GENERIC | m_GEODE),
1331
1332 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1333 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1334
1335 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1336 conflict here in between PPro/Pentium4 based chips that thread 128bit
1337 SSE registers as single units versus K8 based chips that divide SSE
1338 registers to two 64bit halves. This knob promotes all store destinations
1339 to be 128bit to allow register renaming on 128bit SSE units, but usually
1340 results in one extra microop on 64bit SSE units. Experimental results
1341 shows that disabling this option on P4 brings over 20% SPECfp regression,
1342 while enabling it on K8 brings roughly 2.4% regression that can be partly
1343 masked by careful scheduling of moves. */
1344 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1345
1346 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1347 m_AMDFAM10,
1348
1349 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1350 are resolved on SSE register parts instead of whole registers, so we may
1351 maintain just lower part of scalar values in proper format leaving the
1352 upper part undefined. */
1353 m_ATHLON_K8,
1354
1355 /* X86_TUNE_SSE_TYPELESS_STORES */
1356 m_AMD_MULTIPLE,
1357
1358 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1359 m_PPRO | m_PENT4 | m_NOCONA,
1360
1361 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1362 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1363
1364 /* X86_TUNE_PROLOGUE_USING_MOVE */
1365 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1366
1367 /* X86_TUNE_EPILOGUE_USING_MOVE */
1368 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1369
1370 /* X86_TUNE_SHIFT1 */
1371 ~m_486,
1372
1373 /* X86_TUNE_USE_FFREEP */
1374 m_AMD_MULTIPLE,
1375
1376 /* X86_TUNE_INTER_UNIT_MOVES */
1377 ~(m_AMD_MULTIPLE | m_GENERIC),
1378
1379 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1380 ~(m_AMDFAM10),
1381
1382 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1383 than 4 branch instructions in the 16 byte window. */
1384 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1385
1386 /* X86_TUNE_SCHEDULE */
1387 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1388
1389 /* X86_TUNE_USE_BT */
1390 m_AMD_MULTIPLE,
1391
1392 /* X86_TUNE_USE_INCDEC */
1393 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1394
1395 /* X86_TUNE_PAD_RETURNS */
1396 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1397
1398 /* X86_TUNE_EXT_80387_CONSTANTS */
1399 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1400
1401 /* X86_TUNE_SHORTEN_X87_SSE */
1402 ~m_K8,
1403
1404 /* X86_TUNE_AVOID_VECTOR_DECODE */
1405 m_K8 | m_GENERIC64,
1406
1407 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1408 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1409 ~(m_386 | m_486),
1410
1411 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1412 vector path on AMD machines. */
1413 m_K8 | m_GENERIC64 | m_AMDFAM10,
1414
1415 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1416 machines. */
1417 m_K8 | m_GENERIC64 | m_AMDFAM10,
1418
1419 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1420 than a MOV. */
1421 m_PENT,
1422
1423 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1424 but one byte longer. */
1425 m_PENT,
1426
1427 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1428 operand that cannot be represented using a modRM byte. The XOR
1429 replacement is long decoded, so this split helps here as well. */
1430 m_K6,
1431
1432 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1433 from integer to FP. */
1434 m_AMDFAM10,
1435 };
1436
1437 /* Feature tests against the various architecture variations. */
1438 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1439 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1440 ~(m_386 | m_486 | m_PENT | m_K6),
1441
1442 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1443 ~m_386,
1444
1445 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1446 ~(m_386 | m_486),
1447
1448 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1449 ~m_386,
1450
1451 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1452 ~m_386,
1453 };
1454
1455 static const unsigned int x86_accumulate_outgoing_args
1456 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1457
1458 static const unsigned int x86_arch_always_fancy_math_387
1459 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1460 | m_NOCONA | m_CORE2 | m_GENERIC;
1461
1462 static enum stringop_alg stringop_alg = no_stringop;
1463
1464 /* In case the average insn count for single function invocation is
1465 lower than this constant, emit fast (but longer) prologue and
1466 epilogue code. */
1467 #define FAST_PROLOGUE_INSN_COUNT 20
1468
1469 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1470 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1471 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1472 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1473
1474 /* Array of the smallest class containing reg number REGNO, indexed by
1475 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1476
1477 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1478 {
1479 /* ax, dx, cx, bx */
1480 AREG, DREG, CREG, BREG,
1481 /* si, di, bp, sp */
1482 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1483 /* FP registers */
1484 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1485 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1486 /* arg pointer */
1487 NON_Q_REGS,
1488 /* flags, fpsr, fpcr, frame */
1489 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1490 /* SSE registers */
1491 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1492 SSE_REGS, SSE_REGS,
1493 /* MMX registers */
1494 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1495 MMX_REGS, MMX_REGS,
1496 /* REX registers */
1497 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1498 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1499 /* SSE REX registers */
1500 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1501 SSE_REGS, SSE_REGS,
1502 };
1503
1504 /* The "default" register map used in 32bit mode. */
1505
1506 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1507 {
1508 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1509 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1510 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1511 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1512 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1513 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1514 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1515 };
1516
1517 static int const x86_64_int_parameter_registers[6] =
1518 {
1519 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1520 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1521 };
1522
1523 static int const x86_64_ms_abi_int_parameter_registers[4] =
1524 {
1525 2 /*RCX*/, 1 /*RDX*/,
1526 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1527 };
1528
1529 static int const x86_64_int_return_registers[4] =
1530 {
1531 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1532 };
1533
1534 /* The "default" register map used in 64bit mode. */
1535 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1536 {
1537 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1538 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1539 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1540 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1541 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1542 8,9,10,11,12,13,14,15, /* extended integer registers */
1543 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1544 };
1545
1546 /* Define the register numbers to be used in Dwarf debugging information.
1547 The SVR4 reference port C compiler uses the following register numbers
1548 in its Dwarf output code:
1549 0 for %eax (gcc regno = 0)
1550 1 for %ecx (gcc regno = 2)
1551 2 for %edx (gcc regno = 1)
1552 3 for %ebx (gcc regno = 3)
1553 4 for %esp (gcc regno = 7)
1554 5 for %ebp (gcc regno = 6)
1555 6 for %esi (gcc regno = 4)
1556 7 for %edi (gcc regno = 5)
1557 The following three DWARF register numbers are never generated by
1558 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1559 believes these numbers have these meanings.
1560 8 for %eip (no gcc equivalent)
1561 9 for %eflags (gcc regno = 17)
1562 10 for %trapno (no gcc equivalent)
1563 It is not at all clear how we should number the FP stack registers
1564 for the x86 architecture. If the version of SDB on x86/svr4 were
1565 a bit less brain dead with respect to floating-point then we would
1566 have a precedent to follow with respect to DWARF register numbers
1567 for x86 FP registers, but the SDB on x86/svr4 is so completely
1568 broken with respect to FP registers that it is hardly worth thinking
1569 of it as something to strive for compatibility with.
1570 The version of x86/svr4 SDB I have at the moment does (partially)
1571 seem to believe that DWARF register number 11 is associated with
1572 the x86 register %st(0), but that's about all. Higher DWARF
1573 register numbers don't seem to be associated with anything in
1574 particular, and even for DWARF regno 11, SDB only seems to under-
1575 stand that it should say that a variable lives in %st(0) (when
1576 asked via an `=' command) if we said it was in DWARF regno 11,
1577 but SDB still prints garbage when asked for the value of the
1578 variable in question (via a `/' command).
1579 (Also note that the labels SDB prints for various FP stack regs
1580 when doing an `x' command are all wrong.)
1581 Note that these problems generally don't affect the native SVR4
1582 C compiler because it doesn't allow the use of -O with -g and
1583 because when it is *not* optimizing, it allocates a memory
1584 location for each floating-point variable, and the memory
1585 location is what gets described in the DWARF AT_location
1586 attribute for the variable in question.
1587 Regardless of the severe mental illness of the x86/svr4 SDB, we
1588 do something sensible here and we use the following DWARF
1589 register numbers. Note that these are all stack-top-relative
1590 numbers.
1591 11 for %st(0) (gcc regno = 8)
1592 12 for %st(1) (gcc regno = 9)
1593 13 for %st(2) (gcc regno = 10)
1594 14 for %st(3) (gcc regno = 11)
1595 15 for %st(4) (gcc regno = 12)
1596 16 for %st(5) (gcc regno = 13)
1597 17 for %st(6) (gcc regno = 14)
1598 18 for %st(7) (gcc regno = 15)
1599 */
1600 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1601 {
1602 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1603 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1604 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1605 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1606 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1607 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1608 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1609 };
1610
1611 /* Test and compare insns in i386.md store the information needed to
1612 generate branch and scc insns here. */
1613
1614 rtx ix86_compare_op0 = NULL_RTX;
1615 rtx ix86_compare_op1 = NULL_RTX;
1616 rtx ix86_compare_emitted = NULL_RTX;
1617
1618 /* Size of the register save area. */
1619 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1620
1621 /* Define the structure for the machine field in struct function. */
1622
1623 struct stack_local_entry GTY(())
1624 {
1625 unsigned short mode;
1626 unsigned short n;
1627 rtx rtl;
1628 struct stack_local_entry *next;
1629 };
1630
1631 /* Structure describing stack frame layout.
1632 Stack grows downward:
1633
1634 [arguments]
1635 <- ARG_POINTER
1636 saved pc
1637
1638 saved frame pointer if frame_pointer_needed
1639 <- HARD_FRAME_POINTER
1640 [saved regs]
1641
1642 [padding1] \
1643 )
1644 [va_arg registers] (
1645 > to_allocate <- FRAME_POINTER
1646 [frame] (
1647 )
1648 [padding2] /
1649 */
1650 struct ix86_frame
1651 {
1652 int nregs;
1653 int padding1;
1654 int va_arg_size;
1655 HOST_WIDE_INT frame;
1656 int padding2;
1657 int outgoing_arguments_size;
1658 int red_zone_size;
1659
1660 HOST_WIDE_INT to_allocate;
1661 /* The offsets relative to ARG_POINTER. */
1662 HOST_WIDE_INT frame_pointer_offset;
1663 HOST_WIDE_INT hard_frame_pointer_offset;
1664 HOST_WIDE_INT stack_pointer_offset;
1665
1666 /* When save_regs_using_mov is set, emit prologue using
1667 move instead of push instructions. */
1668 bool save_regs_using_mov;
1669 };
1670
1671 /* Code model option. */
1672 enum cmodel ix86_cmodel;
1673 /* Asm dialect. */
1674 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1675 /* TLS dialects. */
1676 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1677
1678 /* Which unit we are generating floating point math for. */
1679 enum fpmath_unit ix86_fpmath;
1680
1681 /* Which cpu are we scheduling for. */
1682 enum processor_type ix86_tune;
1683
1684 /* Which instruction set architecture to use. */
1685 enum processor_type ix86_arch;
1686
1687 /* true if sse prefetch instruction is not NOOP. */
1688 int x86_prefetch_sse;
1689
1690 /* ix86_regparm_string as a number */
1691 static int ix86_regparm;
1692
1693 /* -mstackrealign option */
1694 extern int ix86_force_align_arg_pointer;
1695 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1696
1697 /* Preferred alignment for stack boundary in bits. */
1698 unsigned int ix86_preferred_stack_boundary;
1699
1700 /* Values 1-5: see jump.c */
1701 int ix86_branch_cost;
1702
1703 /* Variables which are this size or smaller are put in the data/bss
1704 or ldata/lbss sections. */
1705
1706 int ix86_section_threshold = 65536;
1707
1708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1709 char internal_label_prefix[16];
1710 int internal_label_prefix_len;
1711
1712 /* Fence to use after loop using movnt. */
1713 tree x86_mfence;
1714
1715 /* Register class used for passing given 64bit part of the argument.
1716 These represent classes as documented by the PS ABI, with the exception
1717 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1718 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1719
1720 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1721 whenever possible (upper half does contain padding). */
1722 enum x86_64_reg_class
1723 {
1724 X86_64_NO_CLASS,
1725 X86_64_INTEGER_CLASS,
1726 X86_64_INTEGERSI_CLASS,
1727 X86_64_SSE_CLASS,
1728 X86_64_SSESF_CLASS,
1729 X86_64_SSEDF_CLASS,
1730 X86_64_SSEUP_CLASS,
1731 X86_64_X87_CLASS,
1732 X86_64_X87UP_CLASS,
1733 X86_64_COMPLEX_X87_CLASS,
1734 X86_64_MEMORY_CLASS
1735 };
1736 static const char * const x86_64_reg_class_name[] =
1737 {
1738 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1739 "sseup", "x87", "x87up", "cplx87", "no"
1740 };
1741
1742 #define MAX_CLASSES 4
1743
1744 /* Table of constants used by fldpi, fldln2, etc.... */
1745 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1746 static bool ext_80387_constants_init = 0;
1747
1748 \f
1749 static struct machine_function * ix86_init_machine_status (void);
1750 static rtx ix86_function_value (const_tree, const_tree, bool);
1751 static int ix86_function_regparm (const_tree, const_tree);
1752 static void ix86_compute_frame_layout (struct ix86_frame *);
1753 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1754 rtx, rtx, int);
1755
1756 \f
1757 /* The svr4 ABI for the i386 says that records and unions are returned
1758 in memory. */
1759 #ifndef DEFAULT_PCC_STRUCT_RETURN
1760 #define DEFAULT_PCC_STRUCT_RETURN 1
1761 #endif
1762
1763 /* Bit flags that specify the ISA we are compiling for. */
1764 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1765
1766 /* A mask of ix86_isa_flags that includes bit X if X
1767 was set or cleared on the command line. */
1768 static int ix86_isa_flags_explicit;
1769
1770 /* Define a set of ISAs which aren't available for a given ISA. MMX
1771 and SSE ISAs are handled separately. */
1772
1773 #define OPTION_MASK_ISA_MMX_UNSET \
1774 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1775 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1776
1777 #define OPTION_MASK_ISA_SSE_UNSET \
1778 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1779 #define OPTION_MASK_ISA_SSE2_UNSET \
1780 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1781 #define OPTION_MASK_ISA_SSE3_UNSET \
1782 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1783 #define OPTION_MASK_ISA_SSSE3_UNSET \
1784 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1785 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1786 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1787 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1788
1789 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1790 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
1791 #define OPTION_MASK_ISA_SSE4 \
1792 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1793 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1794
1795 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1796
1797 #define OPTION_MASK_ISA_SSE5_UNSET \
1798 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1799
1800 /* Vectorization library interface and handlers. */
1801 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1802 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1803
1804 /* Implement TARGET_HANDLE_OPTION. */
1805
1806 static bool
1807 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1808 {
1809 switch (code)
1810 {
1811 case OPT_mmmx:
1812 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
1813 if (!value)
1814 {
1815 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1816 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1817 }
1818 return true;
1819
1820 case OPT_m3dnow:
1821 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
1822 if (!value)
1823 {
1824 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1825 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1826 }
1827 return true;
1828
1829 case OPT_m3dnowa:
1830 return false;
1831
1832 case OPT_msse:
1833 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
1834 if (!value)
1835 {
1836 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1837 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1838 }
1839 return true;
1840
1841 case OPT_msse2:
1842 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
1843 if (!value)
1844 {
1845 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1846 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1847 }
1848 return true;
1849
1850 case OPT_msse3:
1851 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
1852 if (!value)
1853 {
1854 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1855 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1856 }
1857 return true;
1858
1859 case OPT_mssse3:
1860 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
1861 if (!value)
1862 {
1863 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1864 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1865 }
1866 return true;
1867
1868 case OPT_msse4_1:
1869 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
1870 if (!value)
1871 {
1872 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1873 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1874 }
1875 return true;
1876
1877 case OPT_msse4_2:
1878 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
1879 if (!value)
1880 {
1881 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1882 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1883 }
1884 return true;
1885
1886 case OPT_msse4:
1887 ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
1888 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
1889 return true;
1890
1891 case OPT_mno_sse4:
1892 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1893 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1894 return true;
1895
1896 case OPT_msse4a:
1897 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
1898 if (!value)
1899 {
1900 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1901 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1902 }
1903 return true;
1904
1905 case OPT_msse5:
1906 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5;
1907 if (!value)
1908 {
1909 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
1910 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
1911 }
1912 return true;
1913
1914 default:
1915 return true;
1916 }
1917 }
1918
1919 /* Sometimes certain combinations of command options do not make
1920 sense on a particular target machine. You can define a macro
1921 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1922 defined, is executed once just after all the command options have
1923 been parsed.
1924
1925 Don't use this macro to turn on various extra optimizations for
1926 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1927
1928 void
1929 override_options (void)
1930 {
1931 int i;
1932 int ix86_tune_defaulted = 0;
1933 int ix86_arch_specified = 0;
1934 unsigned int ix86_arch_mask, ix86_tune_mask;
1935
1936 /* Comes from final.c -- no real reason to change it. */
1937 #define MAX_CODE_ALIGN 16
1938
1939 static struct ptt
1940 {
1941 const struct processor_costs *cost; /* Processor costs */
1942 const int align_loop; /* Default alignments. */
1943 const int align_loop_max_skip;
1944 const int align_jump;
1945 const int align_jump_max_skip;
1946 const int align_func;
1947 }
1948 const processor_target_table[PROCESSOR_max] =
1949 {
1950 {&i386_cost, 4, 3, 4, 3, 4},
1951 {&i486_cost, 16, 15, 16, 15, 16},
1952 {&pentium_cost, 16, 7, 16, 7, 16},
1953 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1954 {&geode_cost, 0, 0, 0, 0, 0},
1955 {&k6_cost, 32, 7, 32, 7, 32},
1956 {&athlon_cost, 16, 7, 16, 7, 16},
1957 {&pentium4_cost, 0, 0, 0, 0, 0},
1958 {&k8_cost, 16, 7, 16, 7, 16},
1959 {&nocona_cost, 0, 0, 0, 0, 0},
1960 {&core2_cost, 16, 10, 16, 10, 16},
1961 {&generic32_cost, 16, 7, 16, 7, 16},
1962 {&generic64_cost, 16, 10, 16, 10, 16},
1963 {&amdfam10_cost, 32, 24, 32, 7, 32}
1964 };
1965
1966 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
1967 {
1968 "generic",
1969 "i386",
1970 "i486",
1971 "pentium",
1972 "pentium-mmx",
1973 "pentiumpro",
1974 "pentium2",
1975 "pentium3",
1976 "pentium4",
1977 "pentium-m",
1978 "prescott",
1979 "nocona",
1980 "core2",
1981 "geode",
1982 "k6",
1983 "k6-2",
1984 "k6-3",
1985 "athlon",
1986 "athlon-4",
1987 "k8",
1988 "amdfam10"
1989 };
1990
1991 enum pta_flags
1992 {
1993 PTA_SSE = 1 << 0,
1994 PTA_SSE2 = 1 << 1,
1995 PTA_SSE3 = 1 << 2,
1996 PTA_MMX = 1 << 3,
1997 PTA_PREFETCH_SSE = 1 << 4,
1998 PTA_3DNOW = 1 << 5,
1999 PTA_3DNOW_A = 1 << 6,
2000 PTA_64BIT = 1 << 7,
2001 PTA_SSSE3 = 1 << 8,
2002 PTA_CX16 = 1 << 9,
2003 PTA_POPCNT = 1 << 10,
2004 PTA_ABM = 1 << 11,
2005 PTA_SSE4A = 1 << 12,
2006 PTA_NO_SAHF = 1 << 13,
2007 PTA_SSE4_1 = 1 << 14,
2008 PTA_SSE4_2 = 1 << 15,
2009 PTA_SSE5 = 1 << 16
2010 };
2011
2012 static struct pta
2013 {
2014 const char *const name; /* processor name or nickname. */
2015 const enum processor_type processor;
2016 const unsigned /*enum pta_flags*/ flags;
2017 }
2018 const processor_alias_table[] =
2019 {
2020 {"i386", PROCESSOR_I386, 0},
2021 {"i486", PROCESSOR_I486, 0},
2022 {"i586", PROCESSOR_PENTIUM, 0},
2023 {"pentium", PROCESSOR_PENTIUM, 0},
2024 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2025 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2026 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2027 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2028 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2029 {"i686", PROCESSOR_PENTIUMPRO, 0},
2030 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2031 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2032 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2033 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2034 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2035 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2036 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2037 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2038 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2039 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2040 | PTA_CX16 | PTA_NO_SAHF)},
2041 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2042 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2043 | PTA_SSSE3
2044 | PTA_CX16)},
2045 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2046 |PTA_PREFETCH_SSE)},
2047 {"k6", PROCESSOR_K6, PTA_MMX},
2048 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2049 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2050 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2051 | PTA_PREFETCH_SSE)},
2052 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2053 | PTA_PREFETCH_SSE)},
2054 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2055 | PTA_SSE)},
2056 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2057 | PTA_SSE)},
2058 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2059 | PTA_SSE)},
2060 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2061 | PTA_MMX | PTA_SSE | PTA_SSE2
2062 | PTA_NO_SAHF)},
2063 {"k8", PROCESSOR_K8, (PTA_64BIT
2064 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2065 | PTA_SSE | PTA_SSE2
2066 | PTA_NO_SAHF)},
2067 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2068 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2069 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2070 | PTA_NO_SAHF)},
2071 {"opteron", PROCESSOR_K8, (PTA_64BIT
2072 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2073 | PTA_SSE | PTA_SSE2
2074 | PTA_NO_SAHF)},
2075 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2076 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2077 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2078 | PTA_NO_SAHF)},
2079 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2080 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2081 | PTA_SSE | PTA_SSE2
2082 | PTA_NO_SAHF)},
2083 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2084 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2085 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2086 | PTA_NO_SAHF)},
2087 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2088 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2089 | PTA_SSE | PTA_SSE2
2090 | PTA_NO_SAHF)},
2091 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2092 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2093 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2094 | PTA_SSE4A
2095 | PTA_CX16 | PTA_ABM)},
2096 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2097 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2098 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2099 | PTA_SSE4A
2100 | PTA_CX16 | PTA_ABM)},
2101 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2102 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2103 };
2104
2105 int const pta_size = ARRAY_SIZE (processor_alias_table);
2106
2107 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2108 SUBTARGET_OVERRIDE_OPTIONS;
2109 #endif
2110
2111 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2112 SUBSUBTARGET_OVERRIDE_OPTIONS;
2113 #endif
2114
2115 /* -fPIC is the default for x86_64. */
2116 if (TARGET_MACHO && TARGET_64BIT)
2117 flag_pic = 2;
2118
2119 /* Set the default values for switches whose default depends on TARGET_64BIT
2120 in case they weren't overwritten by command line options. */
2121 if (TARGET_64BIT)
2122 {
2123 /* Mach-O doesn't support omitting the frame pointer for now. */
2124 if (flag_omit_frame_pointer == 2)
2125 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2126 if (flag_asynchronous_unwind_tables == 2)
2127 flag_asynchronous_unwind_tables = 1;
2128 if (flag_pcc_struct_return == 2)
2129 flag_pcc_struct_return = 0;
2130 }
2131 else
2132 {
2133 if (flag_omit_frame_pointer == 2)
2134 flag_omit_frame_pointer = 0;
2135 if (flag_asynchronous_unwind_tables == 2)
2136 flag_asynchronous_unwind_tables = 0;
2137 if (flag_pcc_struct_return == 2)
2138 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2139 }
2140
2141 /* Need to check -mtune=generic first. */
2142 if (ix86_tune_string)
2143 {
2144 if (!strcmp (ix86_tune_string, "generic")
2145 || !strcmp (ix86_tune_string, "i686")
2146 /* As special support for cross compilers we read -mtune=native
2147 as -mtune=generic. With native compilers we won't see the
2148 -mtune=native, as it was changed by the driver. */
2149 || !strcmp (ix86_tune_string, "native"))
2150 {
2151 if (TARGET_64BIT)
2152 ix86_tune_string = "generic64";
2153 else
2154 ix86_tune_string = "generic32";
2155 }
2156 else if (!strncmp (ix86_tune_string, "generic", 7))
2157 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2158 }
2159 else
2160 {
2161 if (ix86_arch_string)
2162 ix86_tune_string = ix86_arch_string;
2163 if (!ix86_tune_string)
2164 {
2165 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2166 ix86_tune_defaulted = 1;
2167 }
2168
2169 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2170 need to use a sensible tune option. */
2171 if (!strcmp (ix86_tune_string, "generic")
2172 || !strcmp (ix86_tune_string, "x86-64")
2173 || !strcmp (ix86_tune_string, "i686"))
2174 {
2175 if (TARGET_64BIT)
2176 ix86_tune_string = "generic64";
2177 else
2178 ix86_tune_string = "generic32";
2179 }
2180 }
2181 if (ix86_stringop_string)
2182 {
2183 if (!strcmp (ix86_stringop_string, "rep_byte"))
2184 stringop_alg = rep_prefix_1_byte;
2185 else if (!strcmp (ix86_stringop_string, "libcall"))
2186 stringop_alg = libcall;
2187 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2188 stringop_alg = rep_prefix_4_byte;
2189 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2190 stringop_alg = rep_prefix_8_byte;
2191 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2192 stringop_alg = loop_1_byte;
2193 else if (!strcmp (ix86_stringop_string, "loop"))
2194 stringop_alg = loop;
2195 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2196 stringop_alg = unrolled_loop;
2197 else
2198 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2199 }
2200 if (!strcmp (ix86_tune_string, "x86-64"))
2201 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2202 "-mtune=generic instead as appropriate.");
2203
2204 if (!ix86_arch_string)
2205 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2206 else
2207 ix86_arch_specified = 1;
2208
2209 if (!strcmp (ix86_arch_string, "generic"))
2210 error ("generic CPU can be used only for -mtune= switch");
2211 if (!strncmp (ix86_arch_string, "generic", 7))
2212 error ("bad value (%s) for -march= switch", ix86_arch_string);
2213
2214 if (ix86_cmodel_string != 0)
2215 {
2216 if (!strcmp (ix86_cmodel_string, "small"))
2217 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2218 else if (!strcmp (ix86_cmodel_string, "medium"))
2219 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2220 else if (!strcmp (ix86_cmodel_string, "large"))
2221 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2222 else if (flag_pic)
2223 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2224 else if (!strcmp (ix86_cmodel_string, "32"))
2225 ix86_cmodel = CM_32;
2226 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2227 ix86_cmodel = CM_KERNEL;
2228 else
2229 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2230 }
2231 else
2232 {
2233 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2234 use of rip-relative addressing. This eliminates fixups that
2235 would otherwise be needed if this object is to be placed in a
2236 DLL, and is essentially just as efficient as direct addressing. */
2237 if (TARGET_64BIT_MS_ABI)
2238 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2239 else if (TARGET_64BIT)
2240 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2241 else
2242 ix86_cmodel = CM_32;
2243 }
2244 if (ix86_asm_string != 0)
2245 {
2246 if (! TARGET_MACHO
2247 && !strcmp (ix86_asm_string, "intel"))
2248 ix86_asm_dialect = ASM_INTEL;
2249 else if (!strcmp (ix86_asm_string, "att"))
2250 ix86_asm_dialect = ASM_ATT;
2251 else
2252 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2253 }
2254 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2255 error ("code model %qs not supported in the %s bit mode",
2256 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2257 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2258 sorry ("%i-bit mode not compiled in",
2259 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2260
2261 for (i = 0; i < pta_size; i++)
2262 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2263 {
2264 ix86_arch = processor_alias_table[i].processor;
2265 /* Default cpu tuning to the architecture. */
2266 ix86_tune = ix86_arch;
2267
2268 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2269 error ("CPU you selected does not support x86-64 "
2270 "instruction set");
2271
2272 if (processor_alias_table[i].flags & PTA_MMX
2273 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2274 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2275 if (processor_alias_table[i].flags & PTA_3DNOW
2276 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2277 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2278 if (processor_alias_table[i].flags & PTA_3DNOW_A
2279 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2280 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2281 if (processor_alias_table[i].flags & PTA_SSE
2282 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2283 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2284 if (processor_alias_table[i].flags & PTA_SSE2
2285 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2286 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2287 if (processor_alias_table[i].flags & PTA_SSE3
2288 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2289 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2290 if (processor_alias_table[i].flags & PTA_SSSE3
2291 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2292 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2293 if (processor_alias_table[i].flags & PTA_SSE4_1
2294 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2295 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2296 if (processor_alias_table[i].flags & PTA_SSE4_2
2297 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2298 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2299 if (processor_alias_table[i].flags & PTA_SSE4A
2300 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2301 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2302 if (processor_alias_table[i].flags & PTA_SSE5
2303 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2304 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2305
2306 if (processor_alias_table[i].flags & PTA_ABM)
2307 x86_abm = true;
2308 if (processor_alias_table[i].flags & PTA_CX16)
2309 x86_cmpxchg16b = true;
2310 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2311 x86_popcnt = true;
2312 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2313 x86_prefetch_sse = true;
2314 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2315 x86_sahf = true;
2316
2317 break;
2318 }
2319
2320 if (i == pta_size)
2321 error ("bad value (%s) for -march= switch", ix86_arch_string);
2322
2323 ix86_arch_mask = 1u << ix86_arch;
2324 for (i = 0; i < X86_ARCH_LAST; ++i)
2325 ix86_arch_features[i] &= ix86_arch_mask;
2326
2327 for (i = 0; i < pta_size; i++)
2328 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2329 {
2330 ix86_tune = processor_alias_table[i].processor;
2331 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2332 {
2333 if (ix86_tune_defaulted)
2334 {
2335 ix86_tune_string = "x86-64";
2336 for (i = 0; i < pta_size; i++)
2337 if (! strcmp (ix86_tune_string,
2338 processor_alias_table[i].name))
2339 break;
2340 ix86_tune = processor_alias_table[i].processor;
2341 }
2342 else
2343 error ("CPU you selected does not support x86-64 "
2344 "instruction set");
2345 }
2346 /* Intel CPUs have always interpreted SSE prefetch instructions as
2347 NOPs; so, we can enable SSE prefetch instructions even when
2348 -mtune (rather than -march) points us to a processor that has them.
2349 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2350 higher processors. */
2351 if (TARGET_CMOVE
2352 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2353 x86_prefetch_sse = true;
2354 break;
2355 }
2356 if (i == pta_size)
2357 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2358
2359 ix86_tune_mask = 1u << ix86_tune;
2360 for (i = 0; i < X86_TUNE_LAST; ++i)
2361 ix86_tune_features[i] &= ix86_tune_mask;
2362
2363 if (optimize_size)
2364 ix86_cost = &size_cost;
2365 else
2366 ix86_cost = processor_target_table[ix86_tune].cost;
2367
2368 /* Arrange to set up i386_stack_locals for all functions. */
2369 init_machine_status = ix86_init_machine_status;
2370
2371 /* Validate -mregparm= value. */
2372 if (ix86_regparm_string)
2373 {
2374 if (TARGET_64BIT)
2375 warning (0, "-mregparm is ignored in 64-bit mode");
2376 i = atoi (ix86_regparm_string);
2377 if (i < 0 || i > REGPARM_MAX)
2378 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2379 else
2380 ix86_regparm = i;
2381 }
2382 if (TARGET_64BIT)
2383 ix86_regparm = REGPARM_MAX;
2384
2385 /* If the user has provided any of the -malign-* options,
2386 warn and use that value only if -falign-* is not set.
2387 Remove this code in GCC 3.2 or later. */
2388 if (ix86_align_loops_string)
2389 {
2390 warning (0, "-malign-loops is obsolete, use -falign-loops");
2391 if (align_loops == 0)
2392 {
2393 i = atoi (ix86_align_loops_string);
2394 if (i < 0 || i > MAX_CODE_ALIGN)
2395 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2396 else
2397 align_loops = 1 << i;
2398 }
2399 }
2400
2401 if (ix86_align_jumps_string)
2402 {
2403 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2404 if (align_jumps == 0)
2405 {
2406 i = atoi (ix86_align_jumps_string);
2407 if (i < 0 || i > MAX_CODE_ALIGN)
2408 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2409 else
2410 align_jumps = 1 << i;
2411 }
2412 }
2413
2414 if (ix86_align_funcs_string)
2415 {
2416 warning (0, "-malign-functions is obsolete, use -falign-functions");
2417 if (align_functions == 0)
2418 {
2419 i = atoi (ix86_align_funcs_string);
2420 if (i < 0 || i > MAX_CODE_ALIGN)
2421 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2422 else
2423 align_functions = 1 << i;
2424 }
2425 }
2426
2427 /* Default align_* from the processor table. */
2428 if (align_loops == 0)
2429 {
2430 align_loops = processor_target_table[ix86_tune].align_loop;
2431 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2432 }
2433 if (align_jumps == 0)
2434 {
2435 align_jumps = processor_target_table[ix86_tune].align_jump;
2436 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2437 }
2438 if (align_functions == 0)
2439 {
2440 align_functions = processor_target_table[ix86_tune].align_func;
2441 }
2442
2443 /* Validate -mbranch-cost= value, or provide default. */
2444 ix86_branch_cost = ix86_cost->branch_cost;
2445 if (ix86_branch_cost_string)
2446 {
2447 i = atoi (ix86_branch_cost_string);
2448 if (i < 0 || i > 5)
2449 error ("-mbranch-cost=%d is not between 0 and 5", i);
2450 else
2451 ix86_branch_cost = i;
2452 }
2453 if (ix86_section_threshold_string)
2454 {
2455 i = atoi (ix86_section_threshold_string);
2456 if (i < 0)
2457 error ("-mlarge-data-threshold=%d is negative", i);
2458 else
2459 ix86_section_threshold = i;
2460 }
2461
2462 if (ix86_tls_dialect_string)
2463 {
2464 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2465 ix86_tls_dialect = TLS_DIALECT_GNU;
2466 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2467 ix86_tls_dialect = TLS_DIALECT_GNU2;
2468 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2469 ix86_tls_dialect = TLS_DIALECT_SUN;
2470 else
2471 error ("bad value (%s) for -mtls-dialect= switch",
2472 ix86_tls_dialect_string);
2473 }
2474
2475 if (ix87_precision_string)
2476 {
2477 i = atoi (ix87_precision_string);
2478 if (i != 32 && i != 64 && i != 80)
2479 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2480 }
2481
2482 if (TARGET_64BIT)
2483 {
2484 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2485
2486 /* Enable by default the SSE and MMX builtins. Do allow the user to
2487 explicitly disable any of these. In particular, disabling SSE and
2488 MMX for kernel code is extremely useful. */
2489 if (!ix86_arch_specified)
2490 ix86_isa_flags
2491 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2492 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2493
2494 if (TARGET_RTD)
2495 warning (0, "-mrtd is ignored in 64bit mode");
2496 }
2497 else
2498 {
2499 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2500
2501 if (!ix86_arch_specified)
2502 ix86_isa_flags
2503 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2504
2505 /* i386 ABI does not specify red zone. It still makes sense to use it
2506 when programmer takes care to stack from being destroyed. */
2507 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2508 target_flags |= MASK_NO_RED_ZONE;
2509 }
2510
2511 /* Keep nonleaf frame pointers. */
2512 if (flag_omit_frame_pointer)
2513 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2514 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2515 flag_omit_frame_pointer = 1;
2516
2517 /* If we're doing fast math, we don't care about comparison order
2518 wrt NaNs. This lets us use a shorter comparison sequence. */
2519 if (flag_finite_math_only)
2520 target_flags &= ~MASK_IEEE_FP;
2521
2522 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2523 since the insns won't need emulation. */
2524 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2525 target_flags &= ~MASK_NO_FANCY_MATH_387;
2526
2527 /* Likewise, if the target doesn't have a 387, or we've specified
2528 software floating point, don't use 387 inline intrinsics. */
2529 if (!TARGET_80387)
2530 target_flags |= MASK_NO_FANCY_MATH_387;
2531
2532 /* Turn on SSE4A bultins for -msse5. */
2533 if (TARGET_SSE5)
2534 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2535
2536 /* Turn on SSE4.1 builtins for -msse4.2. */
2537 if (TARGET_SSE4_2)
2538 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2539
2540 /* Turn on SSSE3 builtins for -msse4.1. */
2541 if (TARGET_SSE4_1)
2542 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2543
2544 /* Turn on SSE3 builtins for -mssse3. */
2545 if (TARGET_SSSE3)
2546 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2547
2548 /* Turn on SSE3 builtins for -msse4a. */
2549 if (TARGET_SSE4A)
2550 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2551
2552 /* Turn on SSE2 builtins for -msse3. */
2553 if (TARGET_SSE3)
2554 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2555
2556 /* Turn on SSE builtins for -msse2. */
2557 if (TARGET_SSE2)
2558 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2559
2560 /* Turn on MMX builtins for -msse. */
2561 if (TARGET_SSE)
2562 {
2563 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2564 x86_prefetch_sse = true;
2565 }
2566
2567 /* Turn on MMX builtins for 3Dnow. */
2568 if (TARGET_3DNOW)
2569 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2570
2571 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2572 if (TARGET_SSE4_2 || TARGET_ABM)
2573 x86_popcnt = true;
2574
2575 /* Validate -mpreferred-stack-boundary= value, or provide default.
2576 The default of 128 bits is for Pentium III's SSE __m128. We can't
2577 change it because of optimize_size. Otherwise, we can't mix object
2578 files compiled with -Os and -On. */
2579 ix86_preferred_stack_boundary = 128;
2580 if (ix86_preferred_stack_boundary_string)
2581 {
2582 i = atoi (ix86_preferred_stack_boundary_string);
2583 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2584 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2585 TARGET_64BIT ? 4 : 2);
2586 else
2587 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2588 }
2589
2590 /* Accept -msseregparm only if at least SSE support is enabled. */
2591 if (TARGET_SSEREGPARM
2592 && ! TARGET_SSE)
2593 error ("-msseregparm used without SSE enabled");
2594
2595 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2596 if (ix86_fpmath_string != 0)
2597 {
2598 if (! strcmp (ix86_fpmath_string, "387"))
2599 ix86_fpmath = FPMATH_387;
2600 else if (! strcmp (ix86_fpmath_string, "sse"))
2601 {
2602 if (!TARGET_SSE)
2603 {
2604 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2605 ix86_fpmath = FPMATH_387;
2606 }
2607 else
2608 ix86_fpmath = FPMATH_SSE;
2609 }
2610 else if (! strcmp (ix86_fpmath_string, "387,sse")
2611 || ! strcmp (ix86_fpmath_string, "sse,387"))
2612 {
2613 if (!TARGET_SSE)
2614 {
2615 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2616 ix86_fpmath = FPMATH_387;
2617 }
2618 else if (!TARGET_80387)
2619 {
2620 warning (0, "387 instruction set disabled, using SSE arithmetics");
2621 ix86_fpmath = FPMATH_SSE;
2622 }
2623 else
2624 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2625 }
2626 else
2627 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2628 }
2629
2630 /* If the i387 is disabled, then do not return values in it. */
2631 if (!TARGET_80387)
2632 target_flags &= ~MASK_FLOAT_RETURNS;
2633
2634 /* Use external vectorized library in vectorizing intrinsics. */
2635 if (ix86_veclibabi_string)
2636 {
2637 if (strcmp (ix86_veclibabi_string, "acml") == 0)
2638 ix86_veclib_handler = ix86_veclibabi_acml;
2639 else
2640 error ("unknown vectorization library ABI type (%s) for "
2641 "-mveclibabi= switch", ix86_veclibabi_string);
2642 }
2643
2644 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2645 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2646 && !optimize_size)
2647 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2648
2649 /* ??? Unwind info is not correct around the CFG unless either a frame
2650 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2651 unwind info generation to be aware of the CFG and propagating states
2652 around edges. */
2653 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2654 || flag_exceptions || flag_non_call_exceptions)
2655 && flag_omit_frame_pointer
2656 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2657 {
2658 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2659 warning (0, "unwind tables currently require either a frame pointer "
2660 "or -maccumulate-outgoing-args for correctness");
2661 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2662 }
2663
2664 /* For sane SSE instruction set generation we need fcomi instruction.
2665 It is safe to enable all CMOVE instructions. */
2666 if (TARGET_SSE)
2667 TARGET_CMOVE = 1;
2668
2669 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2670 {
2671 char *p;
2672 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2673 p = strchr (internal_label_prefix, 'X');
2674 internal_label_prefix_len = p - internal_label_prefix;
2675 *p = '\0';
2676 }
2677
2678 /* When scheduling description is not available, disable scheduler pass
2679 so it won't slow down the compilation and make x87 code slower. */
2680 if (!TARGET_SCHEDULE)
2681 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2682
2683 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2684 set_param_value ("simultaneous-prefetches",
2685 ix86_cost->simultaneous_prefetches);
2686 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2687 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2688 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2689 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2690 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2691 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2692
2693 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
2694 can be optimized to ap = __builtin_next_arg (0). */
2695 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
2696 targetm.expand_builtin_va_start = NULL;
2697 }
2698 \f
2699 /* Return true if this goes in large data/bss. */
2700
2701 static bool
2702 ix86_in_large_data_p (tree exp)
2703 {
2704 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2705 return false;
2706
2707 /* Functions are never large data. */
2708 if (TREE_CODE (exp) == FUNCTION_DECL)
2709 return false;
2710
2711 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2712 {
2713 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2714 if (strcmp (section, ".ldata") == 0
2715 || strcmp (section, ".lbss") == 0)
2716 return true;
2717 return false;
2718 }
2719 else
2720 {
2721 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2722
2723 /* If this is an incomplete type with size 0, then we can't put it
2724 in data because it might be too big when completed. */
2725 if (!size || size > ix86_section_threshold)
2726 return true;
2727 }
2728
2729 return false;
2730 }
2731
2732 /* Switch to the appropriate section for output of DECL.
2733 DECL is either a `VAR_DECL' node or a constant of some sort.
2734 RELOC indicates whether forming the initial value of DECL requires
2735 link-time relocations. */
2736
2737 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2738 ATTRIBUTE_UNUSED;
2739
2740 static section *
2741 x86_64_elf_select_section (tree decl, int reloc,
2742 unsigned HOST_WIDE_INT align)
2743 {
2744 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2745 && ix86_in_large_data_p (decl))
2746 {
2747 const char *sname = NULL;
2748 unsigned int flags = SECTION_WRITE;
2749 switch (categorize_decl_for_section (decl, reloc))
2750 {
2751 case SECCAT_DATA:
2752 sname = ".ldata";
2753 break;
2754 case SECCAT_DATA_REL:
2755 sname = ".ldata.rel";
2756 break;
2757 case SECCAT_DATA_REL_LOCAL:
2758 sname = ".ldata.rel.local";
2759 break;
2760 case SECCAT_DATA_REL_RO:
2761 sname = ".ldata.rel.ro";
2762 break;
2763 case SECCAT_DATA_REL_RO_LOCAL:
2764 sname = ".ldata.rel.ro.local";
2765 break;
2766 case SECCAT_BSS:
2767 sname = ".lbss";
2768 flags |= SECTION_BSS;
2769 break;
2770 case SECCAT_RODATA:
2771 case SECCAT_RODATA_MERGE_STR:
2772 case SECCAT_RODATA_MERGE_STR_INIT:
2773 case SECCAT_RODATA_MERGE_CONST:
2774 sname = ".lrodata";
2775 flags = 0;
2776 break;
2777 case SECCAT_SRODATA:
2778 case SECCAT_SDATA:
2779 case SECCAT_SBSS:
2780 gcc_unreachable ();
2781 case SECCAT_TEXT:
2782 case SECCAT_TDATA:
2783 case SECCAT_TBSS:
2784 /* We don't split these for medium model. Place them into
2785 default sections and hope for best. */
2786 break;
2787 }
2788 if (sname)
2789 {
2790 /* We might get called with string constants, but get_named_section
2791 doesn't like them as they are not DECLs. Also, we need to set
2792 flags in that case. */
2793 if (!DECL_P (decl))
2794 return get_section (sname, flags, NULL);
2795 return get_named_section (decl, sname, reloc);
2796 }
2797 }
2798 return default_elf_select_section (decl, reloc, align);
2799 }
2800
2801 /* Build up a unique section name, expressed as a
2802 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2803 RELOC indicates whether the initial value of EXP requires
2804 link-time relocations. */
2805
2806 static void ATTRIBUTE_UNUSED
2807 x86_64_elf_unique_section (tree decl, int reloc)
2808 {
2809 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2810 && ix86_in_large_data_p (decl))
2811 {
2812 const char *prefix = NULL;
2813 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2814 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2815
2816 switch (categorize_decl_for_section (decl, reloc))
2817 {
2818 case SECCAT_DATA:
2819 case SECCAT_DATA_REL:
2820 case SECCAT_DATA_REL_LOCAL:
2821 case SECCAT_DATA_REL_RO:
2822 case SECCAT_DATA_REL_RO_LOCAL:
2823 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2824 break;
2825 case SECCAT_BSS:
2826 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2827 break;
2828 case SECCAT_RODATA:
2829 case SECCAT_RODATA_MERGE_STR:
2830 case SECCAT_RODATA_MERGE_STR_INIT:
2831 case SECCAT_RODATA_MERGE_CONST:
2832 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2833 break;
2834 case SECCAT_SRODATA:
2835 case SECCAT_SDATA:
2836 case SECCAT_SBSS:
2837 gcc_unreachable ();
2838 case SECCAT_TEXT:
2839 case SECCAT_TDATA:
2840 case SECCAT_TBSS:
2841 /* We don't split these for medium model. Place them into
2842 default sections and hope for best. */
2843 break;
2844 }
2845 if (prefix)
2846 {
2847 const char *name;
2848 size_t nlen, plen;
2849 char *string;
2850 plen = strlen (prefix);
2851
2852 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2853 name = targetm.strip_name_encoding (name);
2854 nlen = strlen (name);
2855
2856 string = (char *) alloca (nlen + plen + 1);
2857 memcpy (string, prefix, plen);
2858 memcpy (string + plen, name, nlen + 1);
2859
2860 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2861 return;
2862 }
2863 }
2864 default_unique_section (decl, reloc);
2865 }
2866
2867 #ifdef COMMON_ASM_OP
2868 /* This says how to output assembler code to declare an
2869 uninitialized external linkage data object.
2870
2871 For medium model x86-64 we need to use .largecomm opcode for
2872 large objects. */
2873 void
2874 x86_elf_aligned_common (FILE *file,
2875 const char *name, unsigned HOST_WIDE_INT size,
2876 int align)
2877 {
2878 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2879 && size > (unsigned int)ix86_section_threshold)
2880 fprintf (file, ".largecomm\t");
2881 else
2882 fprintf (file, "%s", COMMON_ASM_OP);
2883 assemble_name (file, name);
2884 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2885 size, align / BITS_PER_UNIT);
2886 }
2887 #endif
2888
2889 /* Utility function for targets to use in implementing
2890 ASM_OUTPUT_ALIGNED_BSS. */
2891
2892 void
2893 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2894 const char *name, unsigned HOST_WIDE_INT size,
2895 int align)
2896 {
2897 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2898 && size > (unsigned int)ix86_section_threshold)
2899 switch_to_section (get_named_section (decl, ".lbss", 0));
2900 else
2901 switch_to_section (bss_section);
2902 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2903 #ifdef ASM_DECLARE_OBJECT_NAME
2904 last_assemble_variable_decl = decl;
2905 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2906 #else
2907 /* Standard thing is just output label for the object. */
2908 ASM_OUTPUT_LABEL (file, name);
2909 #endif /* ASM_DECLARE_OBJECT_NAME */
2910 ASM_OUTPUT_SKIP (file, size ? size : 1);
2911 }
2912 \f
2913 void
2914 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2915 {
2916 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2917 make the problem with not enough registers even worse. */
2918 #ifdef INSN_SCHEDULING
2919 if (level > 1)
2920 flag_schedule_insns = 0;
2921 #endif
2922
2923 if (TARGET_MACHO)
2924 /* The Darwin libraries never set errno, so we might as well
2925 avoid calling them when that's the only reason we would. */
2926 flag_errno_math = 0;
2927
2928 /* The default values of these switches depend on the TARGET_64BIT
2929 that is not known at this moment. Mark these values with 2 and
2930 let user the to override these. In case there is no command line option
2931 specifying them, we will set the defaults in override_options. */
2932 if (optimize >= 1)
2933 flag_omit_frame_pointer = 2;
2934 flag_pcc_struct_return = 2;
2935 flag_asynchronous_unwind_tables = 2;
2936 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2937 SUBTARGET_OPTIMIZATION_OPTIONS;
2938 #endif
2939 }
2940 \f
2941 /* Decide whether we can make a sibling call to a function. DECL is the
2942 declaration of the function being targeted by the call and EXP is the
2943 CALL_EXPR representing the call. */
2944
2945 static bool
2946 ix86_function_ok_for_sibcall (tree decl, tree exp)
2947 {
2948 tree func;
2949 rtx a, b;
2950
2951 /* If we are generating position-independent code, we cannot sibcall
2952 optimize any indirect call, or a direct call to a global function,
2953 as the PLT requires %ebx be live. */
2954 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2955 return false;
2956
2957 if (decl)
2958 func = decl;
2959 else
2960 {
2961 func = TREE_TYPE (CALL_EXPR_FN (exp));
2962 if (POINTER_TYPE_P (func))
2963 func = TREE_TYPE (func);
2964 }
2965
2966 /* Check that the return value locations are the same. Like
2967 if we are returning floats on the 80387 register stack, we cannot
2968 make a sibcall from a function that doesn't return a float to a
2969 function that does or, conversely, from a function that does return
2970 a float to a function that doesn't; the necessary stack adjustment
2971 would not be executed. This is also the place we notice
2972 differences in the return value ABI. Note that it is ok for one
2973 of the functions to have void return type as long as the return
2974 value of the other is passed in a register. */
2975 a = ix86_function_value (TREE_TYPE (exp), func, false);
2976 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2977 cfun->decl, false);
2978 if (STACK_REG_P (a) || STACK_REG_P (b))
2979 {
2980 if (!rtx_equal_p (a, b))
2981 return false;
2982 }
2983 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2984 ;
2985 else if (!rtx_equal_p (a, b))
2986 return false;
2987
2988 /* If this call is indirect, we'll need to be able to use a call-clobbered
2989 register for the address of the target function. Make sure that all
2990 such registers are not used for passing parameters. */
2991 if (!decl && !TARGET_64BIT)
2992 {
2993 tree type;
2994
2995 /* We're looking at the CALL_EXPR, we need the type of the function. */
2996 type = CALL_EXPR_FN (exp); /* pointer expression */
2997 type = TREE_TYPE (type); /* pointer type */
2998 type = TREE_TYPE (type); /* function type */
2999
3000 if (ix86_function_regparm (type, NULL) >= 3)
3001 {
3002 /* ??? Need to count the actual number of registers to be used,
3003 not the possible number of registers. Fix later. */
3004 return false;
3005 }
3006 }
3007
3008 /* Dllimport'd functions are also called indirectly. */
3009 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
3010 && decl && DECL_DLLIMPORT_P (decl)
3011 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
3012 return false;
3013
3014 /* If we forced aligned the stack, then sibcalling would unalign the
3015 stack, which may break the called function. */
3016 if (cfun->machine->force_align_arg_pointer)
3017 return false;
3018
3019 /* Otherwise okay. That also includes certain types of indirect calls. */
3020 return true;
3021 }
3022
3023 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
3024 calling convention attributes;
3025 arguments as in struct attribute_spec.handler. */
3026
3027 static tree
3028 ix86_handle_cconv_attribute (tree *node, tree name,
3029 tree args,
3030 int flags ATTRIBUTE_UNUSED,
3031 bool *no_add_attrs)
3032 {
3033 if (TREE_CODE (*node) != FUNCTION_TYPE
3034 && TREE_CODE (*node) != METHOD_TYPE
3035 && TREE_CODE (*node) != FIELD_DECL
3036 && TREE_CODE (*node) != TYPE_DECL)
3037 {
3038 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3039 IDENTIFIER_POINTER (name));
3040 *no_add_attrs = true;
3041 return NULL_TREE;
3042 }
3043
3044 /* Can combine regparm with all attributes but fastcall. */
3045 if (is_attribute_p ("regparm", name))
3046 {
3047 tree cst;
3048
3049 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3050 {
3051 error ("fastcall and regparm attributes are not compatible");
3052 }
3053
3054 cst = TREE_VALUE (args);
3055 if (TREE_CODE (cst) != INTEGER_CST)
3056 {
3057 warning (OPT_Wattributes,
3058 "%qs attribute requires an integer constant argument",
3059 IDENTIFIER_POINTER (name));
3060 *no_add_attrs = true;
3061 }
3062 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
3063 {
3064 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
3065 IDENTIFIER_POINTER (name), REGPARM_MAX);
3066 *no_add_attrs = true;
3067 }
3068
3069 if (!TARGET_64BIT
3070 && lookup_attribute (ix86_force_align_arg_pointer_string,
3071 TYPE_ATTRIBUTES (*node))
3072 && compare_tree_int (cst, REGPARM_MAX-1))
3073 {
3074 error ("%s functions limited to %d register parameters",
3075 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
3076 }
3077
3078 return NULL_TREE;
3079 }
3080
3081 if (TARGET_64BIT)
3082 {
3083 /* Do not warn when emulating the MS ABI. */
3084 if (!TARGET_64BIT_MS_ABI)
3085 warning (OPT_Wattributes, "%qs attribute ignored",
3086 IDENTIFIER_POINTER (name));
3087 *no_add_attrs = true;
3088 return NULL_TREE;
3089 }
3090
3091 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
3092 if (is_attribute_p ("fastcall", name))
3093 {
3094 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3095 {
3096 error ("fastcall and cdecl attributes are not compatible");
3097 }
3098 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3099 {
3100 error ("fastcall and stdcall attributes are not compatible");
3101 }
3102 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
3103 {
3104 error ("fastcall and regparm attributes are not compatible");
3105 }
3106 }
3107
3108 /* Can combine stdcall with fastcall (redundant), regparm and
3109 sseregparm. */
3110 else if (is_attribute_p ("stdcall", name))
3111 {
3112 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3113 {
3114 error ("stdcall and cdecl attributes are not compatible");
3115 }
3116 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3117 {
3118 error ("stdcall and fastcall attributes are not compatible");
3119 }
3120 }
3121
3122 /* Can combine cdecl with regparm and sseregparm. */
3123 else if (is_attribute_p ("cdecl", name))
3124 {
3125 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3126 {
3127 error ("stdcall and cdecl attributes are not compatible");
3128 }
3129 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3130 {
3131 error ("fastcall and cdecl attributes are not compatible");
3132 }
3133 }
3134
3135 /* Can combine sseregparm with all attributes. */
3136
3137 return NULL_TREE;
3138 }
3139
3140 /* Return 0 if the attributes for two types are incompatible, 1 if they
3141 are compatible, and 2 if they are nearly compatible (which causes a
3142 warning to be generated). */
3143
3144 static int
3145 ix86_comp_type_attributes (const_tree type1, const_tree type2)
3146 {
3147 /* Check for mismatch of non-default calling convention. */
3148 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
3149
3150 if (TREE_CODE (type1) != FUNCTION_TYPE
3151 && TREE_CODE (type1) != METHOD_TYPE)
3152 return 1;
3153
3154 /* Check for mismatched fastcall/regparm types. */
3155 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
3156 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
3157 || (ix86_function_regparm (type1, NULL)
3158 != ix86_function_regparm (type2, NULL)))
3159 return 0;
3160
3161 /* Check for mismatched sseregparm types. */
3162 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
3163 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
3164 return 0;
3165
3166 /* Check for mismatched return types (cdecl vs stdcall). */
3167 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
3168 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
3169 return 0;
3170
3171 return 1;
3172 }
3173 \f
3174 /* Return the regparm value for a function with the indicated TYPE and DECL.
3175 DECL may be NULL when calling function indirectly
3176 or considering a libcall. */
3177
3178 static int
3179 ix86_function_regparm (const_tree type, const_tree decl)
3180 {
3181 tree attr;
3182 int regparm = ix86_regparm;
3183
3184 if (TARGET_64BIT)
3185 return regparm;
3186
3187 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3188 if (attr)
3189 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3190
3191 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3192 return 2;
3193
3194 /* Use register calling convention for local functions when possible. */
3195 if (decl && TREE_CODE (decl) == FUNCTION_DECL
3196 && flag_unit_at_a_time && !profile_flag)
3197 {
3198 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3199 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3200 if (i && i->local)
3201 {
3202 int local_regparm, globals = 0, regno;
3203 struct function *f;
3204
3205 /* Make sure no regparm register is taken by a
3206 fixed register variable. */
3207 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
3208 if (fixed_regs[local_regparm])
3209 break;
3210
3211 /* We can't use regparm(3) for nested functions as these use
3212 static chain pointer in third argument. */
3213 if (local_regparm == 3
3214 && (decl_function_context (decl)
3215 || ix86_force_align_arg_pointer)
3216 && !DECL_NO_STATIC_CHAIN (decl))
3217 local_regparm = 2;
3218
3219 /* If the function realigns its stackpointer, the prologue will
3220 clobber %ecx. If we've already generated code for the callee,
3221 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3222 scanning the attributes for the self-realigning property. */
3223 f = DECL_STRUCT_FUNCTION (decl);
3224 if (local_regparm == 3
3225 && (f ? !!f->machine->force_align_arg_pointer
3226 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3227 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3228 local_regparm = 2;
3229
3230 /* Each fixed register usage increases register pressure,
3231 so less registers should be used for argument passing.
3232 This functionality can be overriden by an explicit
3233 regparm value. */
3234 for (regno = 0; regno <= DI_REG; regno++)
3235 if (fixed_regs[regno])
3236 globals++;
3237
3238 local_regparm
3239 = globals < local_regparm ? local_regparm - globals : 0;
3240
3241 if (local_regparm > regparm)
3242 regparm = local_regparm;
3243 }
3244 }
3245
3246 return regparm;
3247 }
3248
3249 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3250 DFmode (2) arguments in SSE registers for a function with the
3251 indicated TYPE and DECL. DECL may be NULL when calling function
3252 indirectly or considering a libcall. Otherwise return 0. */
3253
3254 static int
3255 ix86_function_sseregparm (const_tree type, const_tree decl)
3256 {
3257 gcc_assert (!TARGET_64BIT);
3258
3259 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3260 by the sseregparm attribute. */
3261 if (TARGET_SSEREGPARM
3262 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3263 {
3264 if (!TARGET_SSE)
3265 {
3266 if (decl)
3267 error ("Calling %qD with attribute sseregparm without "
3268 "SSE/SSE2 enabled", decl);
3269 else
3270 error ("Calling %qT with attribute sseregparm without "
3271 "SSE/SSE2 enabled", type);
3272 return 0;
3273 }
3274
3275 return 2;
3276 }
3277
3278 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3279 (and DFmode for SSE2) arguments in SSE registers. */
3280 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3281 {
3282 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3283 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3284 if (i && i->local)
3285 return TARGET_SSE2 ? 2 : 1;
3286 }
3287
3288 return 0;
3289 }
3290
3291 /* Return true if EAX is live at the start of the function. Used by
3292 ix86_expand_prologue to determine if we need special help before
3293 calling allocate_stack_worker. */
3294
3295 static bool
3296 ix86_eax_live_at_start_p (void)
3297 {
3298 /* Cheat. Don't bother working forward from ix86_function_regparm
3299 to the function type to whether an actual argument is located in
3300 eax. Instead just look at cfg info, which is still close enough
3301 to correct at this point. This gives false positives for broken
3302 functions that might use uninitialized data that happens to be
3303 allocated in eax, but who cares? */
3304 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3305 }
3306
3307 /* Value is the number of bytes of arguments automatically
3308 popped when returning from a subroutine call.
3309 FUNDECL is the declaration node of the function (as a tree),
3310 FUNTYPE is the data type of the function (as a tree),
3311 or for a library call it is an identifier node for the subroutine name.
3312 SIZE is the number of bytes of arguments passed on the stack.
3313
3314 On the 80386, the RTD insn may be used to pop them if the number
3315 of args is fixed, but if the number is variable then the caller
3316 must pop them all. RTD can't be used for library calls now
3317 because the library is compiled with the Unix compiler.
3318 Use of RTD is a selectable option, since it is incompatible with
3319 standard Unix calling sequences. If the option is not selected,
3320 the caller must always pop the args.
3321
3322 The attribute stdcall is equivalent to RTD on a per module basis. */
3323
3324 int
3325 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3326 {
3327 int rtd;
3328
3329 /* None of the 64-bit ABIs pop arguments. */
3330 if (TARGET_64BIT)
3331 return 0;
3332
3333 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3334
3335 /* Cdecl functions override -mrtd, and never pop the stack. */
3336 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3337 {
3338 /* Stdcall and fastcall functions will pop the stack if not
3339 variable args. */
3340 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3341 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3342 rtd = 1;
3343
3344 if (rtd && ! stdarg_p (funtype))
3345 return size;
3346 }
3347
3348 /* Lose any fake structure return argument if it is passed on the stack. */
3349 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3350 && !KEEP_AGGREGATE_RETURN_POINTER)
3351 {
3352 int nregs = ix86_function_regparm (funtype, fundecl);
3353 if (nregs == 0)
3354 return GET_MODE_SIZE (Pmode);
3355 }
3356
3357 return 0;
3358 }
3359 \f
3360 /* Argument support functions. */
3361
3362 /* Return true when register may be used to pass function parameters. */
3363 bool
3364 ix86_function_arg_regno_p (int regno)
3365 {
3366 int i;
3367 const int *parm_regs;
3368
3369 if (!TARGET_64BIT)
3370 {
3371 if (TARGET_MACHO)
3372 return (regno < REGPARM_MAX
3373 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3374 else
3375 return (regno < REGPARM_MAX
3376 || (TARGET_MMX && MMX_REGNO_P (regno)
3377 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3378 || (TARGET_SSE && SSE_REGNO_P (regno)
3379 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3380 }
3381
3382 if (TARGET_MACHO)
3383 {
3384 if (SSE_REGNO_P (regno) && TARGET_SSE)
3385 return true;
3386 }
3387 else
3388 {
3389 if (TARGET_SSE && SSE_REGNO_P (regno)
3390 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3391 return true;
3392 }
3393
3394 /* RAX is used as hidden argument to va_arg functions. */
3395 if (!TARGET_64BIT_MS_ABI && regno == AX_REG)
3396 return true;
3397
3398 if (TARGET_64BIT_MS_ABI)
3399 parm_regs = x86_64_ms_abi_int_parameter_registers;
3400 else
3401 parm_regs = x86_64_int_parameter_registers;
3402 for (i = 0; i < REGPARM_MAX; i++)
3403 if (regno == parm_regs[i])
3404 return true;
3405 return false;
3406 }
3407
3408 /* Return if we do not know how to pass TYPE solely in registers. */
3409
3410 static bool
3411 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
3412 {
3413 if (must_pass_in_stack_var_size_or_pad (mode, type))
3414 return true;
3415
3416 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3417 The layout_type routine is crafty and tries to trick us into passing
3418 currently unsupported vector types on the stack by using TImode. */
3419 return (!TARGET_64BIT && mode == TImode
3420 && type && TREE_CODE (type) != VECTOR_TYPE);
3421 }
3422
3423 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3424 for a call to a function whose data type is FNTYPE.
3425 For a library call, FNTYPE is 0. */
3426
3427 void
3428 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3429 tree fntype, /* tree ptr for function decl */
3430 rtx libname, /* SYMBOL_REF of library name or 0 */
3431 tree fndecl)
3432 {
3433 memset (cum, 0, sizeof (*cum));
3434
3435 /* Set up the number of registers to use for passing arguments. */
3436 cum->nregs = ix86_regparm;
3437 if (TARGET_SSE)
3438 cum->sse_nregs = SSE_REGPARM_MAX;
3439 if (TARGET_MMX)
3440 cum->mmx_nregs = MMX_REGPARM_MAX;
3441 cum->warn_sse = true;
3442 cum->warn_mmx = true;
3443 cum->maybe_vaarg = (fntype
3444 ? (!prototype_p (fntype) || stdarg_p (fntype))
3445 : !libname);
3446
3447 if (!TARGET_64BIT)
3448 {
3449 /* If there are variable arguments, then we won't pass anything
3450 in registers in 32-bit mode. */
3451 if (cum->maybe_vaarg)
3452 {
3453 cum->nregs = 0;
3454 cum->sse_nregs = 0;
3455 cum->mmx_nregs = 0;
3456 cum->warn_sse = 0;
3457 cum->warn_mmx = 0;
3458 return;
3459 }
3460
3461 /* Use ecx and edx registers if function has fastcall attribute,
3462 else look for regparm information. */
3463 if (fntype)
3464 {
3465 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3466 {
3467 cum->nregs = 2;
3468 cum->fastcall = 1;
3469 }
3470 else
3471 cum->nregs = ix86_function_regparm (fntype, fndecl);
3472 }
3473
3474 /* Set up the number of SSE registers used for passing SFmode
3475 and DFmode arguments. Warn for mismatching ABI. */
3476 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3477 }
3478 }
3479
3480 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3481 But in the case of vector types, it is some vector mode.
3482
3483 When we have only some of our vector isa extensions enabled, then there
3484 are some modes for which vector_mode_supported_p is false. For these
3485 modes, the generic vector support in gcc will choose some non-vector mode
3486 in order to implement the type. By computing the natural mode, we'll
3487 select the proper ABI location for the operand and not depend on whatever
3488 the middle-end decides to do with these vector types. */
3489
3490 static enum machine_mode
3491 type_natural_mode (const_tree type)
3492 {
3493 enum machine_mode mode = TYPE_MODE (type);
3494
3495 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3496 {
3497 HOST_WIDE_INT size = int_size_in_bytes (type);
3498 if ((size == 8 || size == 16)
3499 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3500 && TYPE_VECTOR_SUBPARTS (type) > 1)
3501 {
3502 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3503
3504 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3505 mode = MIN_MODE_VECTOR_FLOAT;
3506 else
3507 mode = MIN_MODE_VECTOR_INT;
3508
3509 /* Get the mode which has this inner mode and number of units. */
3510 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3511 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3512 && GET_MODE_INNER (mode) == innermode)
3513 return mode;
3514
3515 gcc_unreachable ();
3516 }
3517 }
3518
3519 return mode;
3520 }
3521
3522 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3523 this may not agree with the mode that the type system has chosen for the
3524 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3525 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3526
3527 static rtx
3528 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3529 unsigned int regno)
3530 {
3531 rtx tmp;
3532
3533 if (orig_mode != BLKmode)
3534 tmp = gen_rtx_REG (orig_mode, regno);
3535 else
3536 {
3537 tmp = gen_rtx_REG (mode, regno);
3538 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3539 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3540 }
3541
3542 return tmp;
3543 }
3544
3545 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3546 of this code is to classify each 8bytes of incoming argument by the register
3547 class and assign registers accordingly. */
3548
3549 /* Return the union class of CLASS1 and CLASS2.
3550 See the x86-64 PS ABI for details. */
3551
3552 static enum x86_64_reg_class
3553 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3554 {
3555 /* Rule #1: If both classes are equal, this is the resulting class. */
3556 if (class1 == class2)
3557 return class1;
3558
3559 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3560 the other class. */
3561 if (class1 == X86_64_NO_CLASS)
3562 return class2;
3563 if (class2 == X86_64_NO_CLASS)
3564 return class1;
3565
3566 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3567 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3568 return X86_64_MEMORY_CLASS;
3569
3570 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3571 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3572 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3573 return X86_64_INTEGERSI_CLASS;
3574 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3575 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3576 return X86_64_INTEGER_CLASS;
3577
3578 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3579 MEMORY is used. */
3580 if (class1 == X86_64_X87_CLASS
3581 || class1 == X86_64_X87UP_CLASS
3582 || class1 == X86_64_COMPLEX_X87_CLASS
3583 || class2 == X86_64_X87_CLASS
3584 || class2 == X86_64_X87UP_CLASS
3585 || class2 == X86_64_COMPLEX_X87_CLASS)
3586 return X86_64_MEMORY_CLASS;
3587
3588 /* Rule #6: Otherwise class SSE is used. */
3589 return X86_64_SSE_CLASS;
3590 }
3591
3592 /* Classify the argument of type TYPE and mode MODE.
3593 CLASSES will be filled by the register class used to pass each word
3594 of the operand. The number of words is returned. In case the parameter
3595 should be passed in memory, 0 is returned. As a special case for zero
3596 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3597
3598 BIT_OFFSET is used internally for handling records and specifies offset
3599 of the offset in bits modulo 256 to avoid overflow cases.
3600
3601 See the x86-64 PS ABI for details.
3602 */
3603
3604 static int
3605 classify_argument (enum machine_mode mode, const_tree type,
3606 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3607 {
3608 HOST_WIDE_INT bytes =
3609 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3610 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3611
3612 /* Variable sized entities are always passed/returned in memory. */
3613 if (bytes < 0)
3614 return 0;
3615
3616 if (mode != VOIDmode
3617 && targetm.calls.must_pass_in_stack (mode, type))
3618 return 0;
3619
3620 if (type && AGGREGATE_TYPE_P (type))
3621 {
3622 int i;
3623 tree field;
3624 enum x86_64_reg_class subclasses[MAX_CLASSES];
3625
3626 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3627 if (bytes > 16)
3628 return 0;
3629
3630 for (i = 0; i < words; i++)
3631 classes[i] = X86_64_NO_CLASS;
3632
3633 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3634 signalize memory class, so handle it as special case. */
3635 if (!words)
3636 {
3637 classes[0] = X86_64_NO_CLASS;
3638 return 1;
3639 }
3640
3641 /* Classify each field of record and merge classes. */
3642 switch (TREE_CODE (type))
3643 {
3644 case RECORD_TYPE:
3645 /* And now merge the fields of structure. */
3646 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3647 {
3648 if (TREE_CODE (field) == FIELD_DECL)
3649 {
3650 int num;
3651
3652 if (TREE_TYPE (field) == error_mark_node)
3653 continue;
3654
3655 /* Bitfields are always classified as integer. Handle them
3656 early, since later code would consider them to be
3657 misaligned integers. */
3658 if (DECL_BIT_FIELD (field))
3659 {
3660 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3661 i < ((int_bit_position (field) + (bit_offset % 64))
3662 + tree_low_cst (DECL_SIZE (field), 0)
3663 + 63) / 8 / 8; i++)
3664 classes[i] =
3665 merge_classes (X86_64_INTEGER_CLASS,
3666 classes[i]);
3667 }
3668 else
3669 {
3670 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3671 TREE_TYPE (field), subclasses,
3672 (int_bit_position (field)
3673 + bit_offset) % 256);
3674 if (!num)
3675 return 0;
3676 for (i = 0; i < num; i++)
3677 {
3678 int pos =
3679 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3680 classes[i + pos] =
3681 merge_classes (subclasses[i], classes[i + pos]);
3682 }
3683 }
3684 }
3685 }
3686 break;
3687
3688 case ARRAY_TYPE:
3689 /* Arrays are handled as small records. */
3690 {
3691 int num;
3692 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3693 TREE_TYPE (type), subclasses, bit_offset);
3694 if (!num)
3695 return 0;
3696
3697 /* The partial classes are now full classes. */
3698 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3699 subclasses[0] = X86_64_SSE_CLASS;
3700 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3701 subclasses[0] = X86_64_INTEGER_CLASS;
3702
3703 for (i = 0; i < words; i++)
3704 classes[i] = subclasses[i % num];
3705
3706 break;
3707 }
3708 case UNION_TYPE:
3709 case QUAL_UNION_TYPE:
3710 /* Unions are similar to RECORD_TYPE but offset is always 0.
3711 */
3712 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3713 {
3714 if (TREE_CODE (field) == FIELD_DECL)
3715 {
3716 int num;
3717
3718 if (TREE_TYPE (field) == error_mark_node)
3719 continue;
3720
3721 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3722 TREE_TYPE (field), subclasses,
3723 bit_offset);
3724 if (!num)
3725 return 0;
3726 for (i = 0; i < num; i++)
3727 classes[i] = merge_classes (subclasses[i], classes[i]);
3728 }
3729 }
3730 break;
3731
3732 default:
3733 gcc_unreachable ();
3734 }
3735
3736 /* Final merger cleanup. */
3737 for (i = 0; i < words; i++)
3738 {
3739 /* If one class is MEMORY, everything should be passed in
3740 memory. */
3741 if (classes[i] == X86_64_MEMORY_CLASS)
3742 return 0;
3743
3744 /* The X86_64_SSEUP_CLASS should be always preceded by
3745 X86_64_SSE_CLASS. */
3746 if (classes[i] == X86_64_SSEUP_CLASS
3747 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3748 classes[i] = X86_64_SSE_CLASS;
3749
3750 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3751 if (classes[i] == X86_64_X87UP_CLASS
3752 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3753 classes[i] = X86_64_SSE_CLASS;
3754 }
3755 return words;
3756 }
3757
3758 /* Compute alignment needed. We align all types to natural boundaries with
3759 exception of XFmode that is aligned to 64bits. */
3760 if (mode != VOIDmode && mode != BLKmode)
3761 {
3762 int mode_alignment = GET_MODE_BITSIZE (mode);
3763
3764 if (mode == XFmode)
3765 mode_alignment = 128;
3766 else if (mode == XCmode)
3767 mode_alignment = 256;
3768 if (COMPLEX_MODE_P (mode))
3769 mode_alignment /= 2;
3770 /* Misaligned fields are always returned in memory. */
3771 if (bit_offset % mode_alignment)
3772 return 0;
3773 }
3774
3775 /* for V1xx modes, just use the base mode */
3776 if (VECTOR_MODE_P (mode)
3777 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3778 mode = GET_MODE_INNER (mode);
3779
3780 /* Classification of atomic types. */
3781 switch (mode)
3782 {
3783 case SDmode:
3784 case DDmode:
3785 classes[0] = X86_64_SSE_CLASS;
3786 return 1;
3787 case TDmode:
3788 classes[0] = X86_64_SSE_CLASS;
3789 classes[1] = X86_64_SSEUP_CLASS;
3790 return 2;
3791 case DImode:
3792 case SImode:
3793 case HImode:
3794 case QImode:
3795 case CSImode:
3796 case CHImode:
3797 case CQImode:
3798 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3799 classes[0] = X86_64_INTEGERSI_CLASS;
3800 else
3801 classes[0] = X86_64_INTEGER_CLASS;
3802 return 1;
3803 case CDImode:
3804 case TImode:
3805 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3806 return 2;
3807 case CTImode:
3808 return 0;
3809 case SFmode:
3810 if (!(bit_offset % 64))
3811 classes[0] = X86_64_SSESF_CLASS;
3812 else
3813 classes[0] = X86_64_SSE_CLASS;
3814 return 1;
3815 case DFmode:
3816 classes[0] = X86_64_SSEDF_CLASS;
3817 return 1;
3818 case XFmode:
3819 classes[0] = X86_64_X87_CLASS;
3820 classes[1] = X86_64_X87UP_CLASS;
3821 return 2;
3822 case TFmode:
3823 classes[0] = X86_64_SSE_CLASS;
3824 classes[1] = X86_64_SSEUP_CLASS;
3825 return 2;
3826 case SCmode:
3827 classes[0] = X86_64_SSE_CLASS;
3828 return 1;
3829 case DCmode:
3830 classes[0] = X86_64_SSEDF_CLASS;
3831 classes[1] = X86_64_SSEDF_CLASS;
3832 return 2;
3833 case XCmode:
3834 classes[0] = X86_64_COMPLEX_X87_CLASS;
3835 return 1;
3836 case TCmode:
3837 /* This modes is larger than 16 bytes. */
3838 return 0;
3839 case V4SFmode:
3840 case V4SImode:
3841 case V16QImode:
3842 case V8HImode:
3843 case V2DFmode:
3844 case V2DImode:
3845 classes[0] = X86_64_SSE_CLASS;
3846 classes[1] = X86_64_SSEUP_CLASS;
3847 return 2;
3848 case V2SFmode:
3849 case V2SImode:
3850 case V4HImode:
3851 case V8QImode:
3852 classes[0] = X86_64_SSE_CLASS;
3853 return 1;
3854 case BLKmode:
3855 case VOIDmode:
3856 return 0;
3857 default:
3858 gcc_assert (VECTOR_MODE_P (mode));
3859
3860 if (bytes > 16)
3861 return 0;
3862
3863 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3864
3865 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3866 classes[0] = X86_64_INTEGERSI_CLASS;
3867 else
3868 classes[0] = X86_64_INTEGER_CLASS;
3869 classes[1] = X86_64_INTEGER_CLASS;
3870 return 1 + (bytes > 8);
3871 }
3872 }
3873
3874 /* Examine the argument and return set number of register required in each
3875 class. Return 0 iff parameter should be passed in memory. */
3876 static int
3877 examine_argument (enum machine_mode mode, const_tree type, int in_return,
3878 int *int_nregs, int *sse_nregs)
3879 {
3880 enum x86_64_reg_class regclass[MAX_CLASSES];
3881 int n = classify_argument (mode, type, regclass, 0);
3882
3883 *int_nregs = 0;
3884 *sse_nregs = 0;
3885 if (!n)
3886 return 0;
3887 for (n--; n >= 0; n--)
3888 switch (regclass[n])
3889 {
3890 case X86_64_INTEGER_CLASS:
3891 case X86_64_INTEGERSI_CLASS:
3892 (*int_nregs)++;
3893 break;
3894 case X86_64_SSE_CLASS:
3895 case X86_64_SSESF_CLASS:
3896 case X86_64_SSEDF_CLASS:
3897 (*sse_nregs)++;
3898 break;
3899 case X86_64_NO_CLASS:
3900 case X86_64_SSEUP_CLASS:
3901 break;
3902 case X86_64_X87_CLASS:
3903 case X86_64_X87UP_CLASS:
3904 if (!in_return)
3905 return 0;
3906 break;
3907 case X86_64_COMPLEX_X87_CLASS:
3908 return in_return ? 2 : 0;
3909 case X86_64_MEMORY_CLASS:
3910 gcc_unreachable ();
3911 }
3912 return 1;
3913 }
3914
3915 /* Construct container for the argument used by GCC interface. See
3916 FUNCTION_ARG for the detailed description. */
3917
3918 static rtx
3919 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3920 const_tree type, int in_return, int nintregs, int nsseregs,
3921 const int *intreg, int sse_regno)
3922 {
3923 /* The following variables hold the static issued_error state. */
3924 static bool issued_sse_arg_error;
3925 static bool issued_sse_ret_error;
3926 static bool issued_x87_ret_error;
3927
3928 enum machine_mode tmpmode;
3929 int bytes =
3930 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3931 enum x86_64_reg_class regclass[MAX_CLASSES];
3932 int n;
3933 int i;
3934 int nexps = 0;
3935 int needed_sseregs, needed_intregs;
3936 rtx exp[MAX_CLASSES];
3937 rtx ret;
3938
3939 n = classify_argument (mode, type, regclass, 0);
3940 if (!n)
3941 return NULL;
3942 if (!examine_argument (mode, type, in_return, &needed_intregs,
3943 &needed_sseregs))
3944 return NULL;
3945 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3946 return NULL;
3947
3948 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3949 some less clueful developer tries to use floating-point anyway. */
3950 if (needed_sseregs && !TARGET_SSE)
3951 {
3952 if (in_return)
3953 {
3954 if (!issued_sse_ret_error)
3955 {
3956 error ("SSE register return with SSE disabled");
3957 issued_sse_ret_error = true;
3958 }
3959 }
3960 else if (!issued_sse_arg_error)
3961 {
3962 error ("SSE register argument with SSE disabled");
3963 issued_sse_arg_error = true;
3964 }
3965 return NULL;
3966 }
3967
3968 /* Likewise, error if the ABI requires us to return values in the
3969 x87 registers and the user specified -mno-80387. */
3970 if (!TARGET_80387 && in_return)
3971 for (i = 0; i < n; i++)
3972 if (regclass[i] == X86_64_X87_CLASS
3973 || regclass[i] == X86_64_X87UP_CLASS
3974 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
3975 {
3976 if (!issued_x87_ret_error)
3977 {
3978 error ("x87 register return with x87 disabled");
3979 issued_x87_ret_error = true;
3980 }
3981 return NULL;
3982 }
3983
3984 /* First construct simple cases. Avoid SCmode, since we want to use
3985 single register to pass this type. */
3986 if (n == 1 && mode != SCmode)
3987 switch (regclass[0])
3988 {
3989 case X86_64_INTEGER_CLASS:
3990 case X86_64_INTEGERSI_CLASS:
3991 return gen_rtx_REG (mode, intreg[0]);
3992 case X86_64_SSE_CLASS:
3993 case X86_64_SSESF_CLASS:
3994 case X86_64_SSEDF_CLASS:
3995 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3996 case X86_64_X87_CLASS:
3997 case X86_64_COMPLEX_X87_CLASS:
3998 return gen_rtx_REG (mode, FIRST_STACK_REG);
3999 case X86_64_NO_CLASS:
4000 /* Zero sized array, struct or class. */
4001 return NULL;
4002 default:
4003 gcc_unreachable ();
4004 }
4005 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
4006 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
4007 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
4008
4009 if (n == 2
4010 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
4011 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
4012 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
4013 && regclass[1] == X86_64_INTEGER_CLASS
4014 && (mode == CDImode || mode == TImode || mode == TFmode)
4015 && intreg[0] + 1 == intreg[1])
4016 return gen_rtx_REG (mode, intreg[0]);
4017
4018 /* Otherwise figure out the entries of the PARALLEL. */
4019 for (i = 0; i < n; i++)
4020 {
4021 switch (regclass[i])
4022 {
4023 case X86_64_NO_CLASS:
4024 break;
4025 case X86_64_INTEGER_CLASS:
4026 case X86_64_INTEGERSI_CLASS:
4027 /* Merge TImodes on aligned occasions here too. */
4028 if (i * 8 + 8 > bytes)
4029 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
4030 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
4031 tmpmode = SImode;
4032 else
4033 tmpmode = DImode;
4034 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4035 if (tmpmode == BLKmode)
4036 tmpmode = DImode;
4037 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4038 gen_rtx_REG (tmpmode, *intreg),
4039 GEN_INT (i*8));
4040 intreg++;
4041 break;
4042 case X86_64_SSESF_CLASS:
4043 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4044 gen_rtx_REG (SFmode,
4045 SSE_REGNO (sse_regno)),
4046 GEN_INT (i*8));
4047 sse_regno++;
4048 break;
4049 case X86_64_SSEDF_CLASS:
4050 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4051 gen_rtx_REG (DFmode,
4052 SSE_REGNO (sse_regno)),
4053 GEN_INT (i*8));
4054 sse_regno++;
4055 break;
4056 case X86_64_SSE_CLASS:
4057 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
4058 tmpmode = TImode;
4059 else
4060 tmpmode = DImode;
4061 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4062 gen_rtx_REG (tmpmode,
4063 SSE_REGNO (sse_regno)),
4064 GEN_INT (i*8));
4065 if (tmpmode == TImode)
4066 i++;
4067 sse_regno++;
4068 break;
4069 default:
4070 gcc_unreachable ();
4071 }
4072 }
4073
4074 /* Empty aligned struct, union or class. */
4075 if (nexps == 0)
4076 return NULL;
4077
4078 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
4079 for (i = 0; i < nexps; i++)
4080 XVECEXP (ret, 0, i) = exp [i];
4081 return ret;
4082 }
4083
4084 /* Update the data in CUM to advance over an argument of mode MODE
4085 and data type TYPE. (TYPE is null for libcalls where that information
4086 may not be available.) */
4087
4088 static void
4089 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4090 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4091 {
4092 switch (mode)
4093 {
4094 default:
4095 break;
4096
4097 case BLKmode:
4098 if (bytes < 0)
4099 break;
4100 /* FALLTHRU */
4101
4102 case DImode:
4103 case SImode:
4104 case HImode:
4105 case QImode:
4106 cum->words += words;
4107 cum->nregs -= words;
4108 cum->regno += words;
4109
4110 if (cum->nregs <= 0)
4111 {
4112 cum->nregs = 0;
4113 cum->regno = 0;
4114 }
4115 break;
4116
4117 case DFmode:
4118 if (cum->float_in_sse < 2)
4119 break;
4120 case SFmode:
4121 if (cum->float_in_sse < 1)
4122 break;
4123 /* FALLTHRU */
4124
4125 case TImode:
4126 case V16QImode:
4127 case V8HImode:
4128 case V4SImode:
4129 case V2DImode:
4130 case V4SFmode:
4131 case V2DFmode:
4132 if (!type || !AGGREGATE_TYPE_P (type))
4133 {
4134 cum->sse_words += words;
4135 cum->sse_nregs -= 1;
4136 cum->sse_regno += 1;
4137 if (cum->sse_nregs <= 0)
4138 {
4139 cum->sse_nregs = 0;
4140 cum->sse_regno = 0;
4141 }
4142 }
4143 break;
4144
4145 case V8QImode:
4146 case V4HImode:
4147 case V2SImode:
4148 case V2SFmode:
4149 if (!type || !AGGREGATE_TYPE_P (type))
4150 {
4151 cum->mmx_words += words;
4152 cum->mmx_nregs -= 1;
4153 cum->mmx_regno += 1;
4154 if (cum->mmx_nregs <= 0)
4155 {
4156 cum->mmx_nregs = 0;
4157 cum->mmx_regno = 0;
4158 }
4159 }
4160 break;
4161 }
4162 }
4163
4164 static void
4165 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4166 tree type, HOST_WIDE_INT words)
4167 {
4168 int int_nregs, sse_nregs;
4169
4170 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
4171 cum->words += words;
4172 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
4173 {
4174 cum->nregs -= int_nregs;
4175 cum->sse_nregs -= sse_nregs;
4176 cum->regno += int_nregs;
4177 cum->sse_regno += sse_nregs;
4178 }
4179 else
4180 cum->words += words;
4181 }
4182
4183 static void
4184 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
4185 HOST_WIDE_INT words)
4186 {
4187 /* Otherwise, this should be passed indirect. */
4188 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
4189
4190 cum->words += words;
4191 if (cum->nregs > 0)
4192 {
4193 cum->nregs -= 1;
4194 cum->regno += 1;
4195 }
4196 }
4197
4198 void
4199 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4200 tree type, int named ATTRIBUTE_UNUSED)
4201 {
4202 HOST_WIDE_INT bytes, words;
4203
4204 if (mode == BLKmode)
4205 bytes = int_size_in_bytes (type);
4206 else
4207 bytes = GET_MODE_SIZE (mode);
4208 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4209
4210 if (type)
4211 mode = type_natural_mode (type);
4212
4213 if (TARGET_64BIT_MS_ABI)
4214 function_arg_advance_ms_64 (cum, bytes, words);
4215 else if (TARGET_64BIT)
4216 function_arg_advance_64 (cum, mode, type, words);
4217 else
4218 function_arg_advance_32 (cum, mode, type, bytes, words);
4219 }
4220
4221 /* Define where to put the arguments to a function.
4222 Value is zero to push the argument on the stack,
4223 or a hard register in which to store the argument.
4224
4225 MODE is the argument's machine mode.
4226 TYPE is the data type of the argument (as a tree).
4227 This is null for libcalls where that information may
4228 not be available.
4229 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4230 the preceding args and about the function being called.
4231 NAMED is nonzero if this argument is a named parameter
4232 (otherwise it is an extra parameter matching an ellipsis). */
4233
4234 static rtx
4235 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4236 enum machine_mode orig_mode, tree type,
4237 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4238 {
4239 static bool warnedsse, warnedmmx;
4240
4241 /* Avoid the AL settings for the Unix64 ABI. */
4242 if (mode == VOIDmode)
4243 return constm1_rtx;
4244
4245 switch (mode)
4246 {
4247 default:
4248 break;
4249
4250 case BLKmode:
4251 if (bytes < 0)
4252 break;
4253 /* FALLTHRU */
4254 case DImode:
4255 case SImode:
4256 case HImode:
4257 case QImode:
4258 if (words <= cum->nregs)
4259 {
4260 int regno = cum->regno;
4261
4262 /* Fastcall allocates the first two DWORD (SImode) or
4263 smaller arguments to ECX and EDX if it isn't an
4264 aggregate type . */
4265 if (cum->fastcall)
4266 {
4267 if (mode == BLKmode
4268 || mode == DImode
4269 || (type && AGGREGATE_TYPE_P (type)))
4270 break;
4271
4272 /* ECX not EAX is the first allocated register. */
4273 if (regno == AX_REG)
4274 regno = CX_REG;
4275 }
4276 return gen_rtx_REG (mode, regno);
4277 }
4278 break;
4279
4280 case DFmode:
4281 if (cum->float_in_sse < 2)
4282 break;
4283 case SFmode:
4284 if (cum->float_in_sse < 1)
4285 break;
4286 /* FALLTHRU */
4287 case TImode:
4288 case V16QImode:
4289 case V8HImode:
4290 case V4SImode:
4291 case V2DImode:
4292 case V4SFmode:
4293 case V2DFmode:
4294 if (!type || !AGGREGATE_TYPE_P (type))
4295 {
4296 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4297 {
4298 warnedsse = true;
4299 warning (0, "SSE vector argument without SSE enabled "
4300 "changes the ABI");
4301 }
4302 if (cum->sse_nregs)
4303 return gen_reg_or_parallel (mode, orig_mode,
4304 cum->sse_regno + FIRST_SSE_REG);
4305 }
4306 break;
4307
4308 case V8QImode:
4309 case V4HImode:
4310 case V2SImode:
4311 case V2SFmode:
4312 if (!type || !AGGREGATE_TYPE_P (type))
4313 {
4314 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4315 {
4316 warnedmmx = true;
4317 warning (0, "MMX vector argument without MMX enabled "
4318 "changes the ABI");
4319 }
4320 if (cum->mmx_nregs)
4321 return gen_reg_or_parallel (mode, orig_mode,
4322 cum->mmx_regno + FIRST_MMX_REG);
4323 }
4324 break;
4325 }
4326
4327 return NULL_RTX;
4328 }
4329
4330 static rtx
4331 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4332 enum machine_mode orig_mode, tree type)
4333 {
4334 /* Handle a hidden AL argument containing number of registers
4335 for varargs x86-64 functions. */
4336 if (mode == VOIDmode)
4337 return GEN_INT (cum->maybe_vaarg
4338 ? (cum->sse_nregs < 0
4339 ? SSE_REGPARM_MAX
4340 : cum->sse_regno)
4341 : -1);
4342
4343 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4344 cum->sse_nregs,
4345 &x86_64_int_parameter_registers [cum->regno],
4346 cum->sse_regno);
4347 }
4348
4349 static rtx
4350 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4351 enum machine_mode orig_mode, int named)
4352 {
4353 unsigned int regno;
4354
4355 /* Avoid the AL settings for the Unix64 ABI. */
4356 if (mode == VOIDmode)
4357 return constm1_rtx;
4358
4359 /* If we've run out of registers, it goes on the stack. */
4360 if (cum->nregs == 0)
4361 return NULL_RTX;
4362
4363 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4364
4365 /* Only floating point modes are passed in anything but integer regs. */
4366 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4367 {
4368 if (named)
4369 regno = cum->regno + FIRST_SSE_REG;
4370 else
4371 {
4372 rtx t1, t2;
4373
4374 /* Unnamed floating parameters are passed in both the
4375 SSE and integer registers. */
4376 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4377 t2 = gen_rtx_REG (mode, regno);
4378 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4379 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4380 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4381 }
4382 }
4383
4384 return gen_reg_or_parallel (mode, orig_mode, regno);
4385 }
4386
4387 rtx
4388 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4389 tree type, int named)
4390 {
4391 enum machine_mode mode = omode;
4392 HOST_WIDE_INT bytes, words;
4393
4394 if (mode == BLKmode)
4395 bytes = int_size_in_bytes (type);
4396 else
4397 bytes = GET_MODE_SIZE (mode);
4398 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4399
4400 /* To simplify the code below, represent vector types with a vector mode
4401 even if MMX/SSE are not active. */
4402 if (type && TREE_CODE (type) == VECTOR_TYPE)
4403 mode = type_natural_mode (type);
4404
4405 if (TARGET_64BIT_MS_ABI)
4406 return function_arg_ms_64 (cum, mode, omode, named);
4407 else if (TARGET_64BIT)
4408 return function_arg_64 (cum, mode, omode, type);
4409 else
4410 return function_arg_32 (cum, mode, omode, type, bytes, words);
4411 }
4412
4413 /* A C expression that indicates when an argument must be passed by
4414 reference. If nonzero for an argument, a copy of that argument is
4415 made in memory and a pointer to the argument is passed instead of
4416 the argument itself. The pointer is passed in whatever way is
4417 appropriate for passing a pointer to that type. */
4418
4419 static bool
4420 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4421 enum machine_mode mode ATTRIBUTE_UNUSED,
4422 const_tree type, bool named ATTRIBUTE_UNUSED)
4423 {
4424 if (TARGET_64BIT_MS_ABI)
4425 {
4426 if (type)
4427 {
4428 /* Arrays are passed by reference. */
4429 if (TREE_CODE (type) == ARRAY_TYPE)
4430 return true;
4431
4432 if (AGGREGATE_TYPE_P (type))
4433 {
4434 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4435 are passed by reference. */
4436 int el2 = exact_log2 (int_size_in_bytes (type));
4437 return !(el2 >= 0 && el2 <= 3);
4438 }
4439 }
4440
4441 /* __m128 is passed by reference. */
4442 /* ??? How to handle complex? For now treat them as structs,
4443 and pass them by reference if they're too large. */
4444 if (GET_MODE_SIZE (mode) > 8)
4445 return true;
4446 }
4447 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4448 return 1;
4449
4450 return 0;
4451 }
4452
4453 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4454 ABI. Only called if TARGET_SSE. */
4455 static bool
4456 contains_128bit_aligned_vector_p (tree type)
4457 {
4458 enum machine_mode mode = TYPE_MODE (type);
4459 if (SSE_REG_MODE_P (mode)
4460 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4461 return true;
4462 if (TYPE_ALIGN (type) < 128)
4463 return false;
4464
4465 if (AGGREGATE_TYPE_P (type))
4466 {
4467 /* Walk the aggregates recursively. */
4468 switch (TREE_CODE (type))
4469 {
4470 case RECORD_TYPE:
4471 case UNION_TYPE:
4472 case QUAL_UNION_TYPE:
4473 {
4474 tree field;
4475
4476 /* Walk all the structure fields. */
4477 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4478 {
4479 if (TREE_CODE (field) == FIELD_DECL
4480 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4481 return true;
4482 }
4483 break;
4484 }
4485
4486 case ARRAY_TYPE:
4487 /* Just for use if some languages passes arrays by value. */
4488 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4489 return true;
4490 break;
4491
4492 default:
4493 gcc_unreachable ();
4494 }
4495 }
4496 return false;
4497 }
4498
4499 /* Gives the alignment boundary, in bits, of an argument with the
4500 specified mode and type. */
4501
4502 int
4503 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4504 {
4505 int align;
4506 if (type)
4507 align = TYPE_ALIGN (type);
4508 else
4509 align = GET_MODE_ALIGNMENT (mode);
4510 if (align < PARM_BOUNDARY)
4511 align = PARM_BOUNDARY;
4512 if (!TARGET_64BIT)
4513 {
4514 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4515 make an exception for SSE modes since these require 128bit
4516 alignment.
4517
4518 The handling here differs from field_alignment. ICC aligns MMX
4519 arguments to 4 byte boundaries, while structure fields are aligned
4520 to 8 byte boundaries. */
4521 if (!TARGET_SSE)
4522 align = PARM_BOUNDARY;
4523 else if (!type)
4524 {
4525 if (!SSE_REG_MODE_P (mode))
4526 align = PARM_BOUNDARY;
4527 }
4528 else
4529 {
4530 if (!contains_128bit_aligned_vector_p (type))
4531 align = PARM_BOUNDARY;
4532 }
4533 }
4534 if (align > 128)
4535 align = 128;
4536 return align;
4537 }
4538
4539 /* Return true if N is a possible register number of function value. */
4540
4541 bool
4542 ix86_function_value_regno_p (int regno)
4543 {
4544 switch (regno)
4545 {
4546 case 0:
4547 return true;
4548
4549 case FIRST_FLOAT_REG:
4550 if (TARGET_64BIT_MS_ABI)
4551 return false;
4552 return TARGET_FLOAT_RETURNS_IN_80387;
4553
4554 case FIRST_SSE_REG:
4555 return TARGET_SSE;
4556
4557 case FIRST_MMX_REG:
4558 if (TARGET_MACHO || TARGET_64BIT)
4559 return false;
4560 return TARGET_MMX;
4561 }
4562
4563 return false;
4564 }
4565
4566 /* Define how to find the value returned by a function.
4567 VALTYPE is the data type of the value (as a tree).
4568 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4569 otherwise, FUNC is 0. */
4570
4571 static rtx
4572 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4573 const_tree fntype, const_tree fn)
4574 {
4575 unsigned int regno;
4576
4577 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4578 we normally prevent this case when mmx is not available. However
4579 some ABIs may require the result to be returned like DImode. */
4580 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4581 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4582
4583 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4584 we prevent this case when sse is not available. However some ABIs
4585 may require the result to be returned like integer TImode. */
4586 else if (mode == TImode
4587 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4588 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4589
4590 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4591 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4592 regno = FIRST_FLOAT_REG;
4593 else
4594 /* Most things go in %eax. */
4595 regno = AX_REG;
4596
4597 /* Override FP return register with %xmm0 for local functions when
4598 SSE math is enabled or for functions with sseregparm attribute. */
4599 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4600 {
4601 int sse_level = ix86_function_sseregparm (fntype, fn);
4602 if ((sse_level >= 1 && mode == SFmode)
4603 || (sse_level == 2 && mode == DFmode))
4604 regno = FIRST_SSE_REG;
4605 }
4606
4607 return gen_rtx_REG (orig_mode, regno);
4608 }
4609
4610 static rtx
4611 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4612 const_tree valtype)
4613 {
4614 rtx ret;
4615
4616 /* Handle libcalls, which don't provide a type node. */
4617 if (valtype == NULL)
4618 {
4619 switch (mode)
4620 {
4621 case SFmode:
4622 case SCmode:
4623 case DFmode:
4624 case DCmode:
4625 case TFmode:
4626 case SDmode:
4627 case DDmode:
4628 case TDmode:
4629 return gen_rtx_REG (mode, FIRST_SSE_REG);
4630 case XFmode:
4631 case XCmode:
4632 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4633 case TCmode:
4634 return NULL;
4635 default:
4636 return gen_rtx_REG (mode, AX_REG);
4637 }
4638 }
4639
4640 ret = construct_container (mode, orig_mode, valtype, 1,
4641 REGPARM_MAX, SSE_REGPARM_MAX,
4642 x86_64_int_return_registers, 0);
4643
4644 /* For zero sized structures, construct_container returns NULL, but we
4645 need to keep rest of compiler happy by returning meaningful value. */
4646 if (!ret)
4647 ret = gen_rtx_REG (orig_mode, AX_REG);
4648
4649 return ret;
4650 }
4651
4652 static rtx
4653 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4654 {
4655 unsigned int regno = AX_REG;
4656
4657 if (TARGET_SSE)
4658 {
4659 if (mode == SFmode || mode == DFmode)
4660 regno = FIRST_SSE_REG;
4661 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4662 regno = FIRST_SSE_REG;
4663 }
4664
4665 return gen_rtx_REG (orig_mode, regno);
4666 }
4667
4668 static rtx
4669 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4670 enum machine_mode orig_mode, enum machine_mode mode)
4671 {
4672 const_tree fn, fntype;
4673
4674 fn = NULL_TREE;
4675 if (fntype_or_decl && DECL_P (fntype_or_decl))
4676 fn = fntype_or_decl;
4677 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4678
4679 if (TARGET_64BIT_MS_ABI)
4680 return function_value_ms_64 (orig_mode, mode);
4681 else if (TARGET_64BIT)
4682 return function_value_64 (orig_mode, mode, valtype);
4683 else
4684 return function_value_32 (orig_mode, mode, fntype, fn);
4685 }
4686
4687 static rtx
4688 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
4689 bool outgoing ATTRIBUTE_UNUSED)
4690 {
4691 enum machine_mode mode, orig_mode;
4692
4693 orig_mode = TYPE_MODE (valtype);
4694 mode = type_natural_mode (valtype);
4695 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4696 }
4697
4698 rtx
4699 ix86_libcall_value (enum machine_mode mode)
4700 {
4701 return ix86_function_value_1 (NULL, NULL, mode, mode);
4702 }
4703
4704 /* Return true iff type is returned in memory. */
4705
4706 static int
4707 return_in_memory_32 (const_tree type, enum machine_mode mode)
4708 {
4709 HOST_WIDE_INT size;
4710
4711 if (mode == BLKmode)
4712 return 1;
4713
4714 size = int_size_in_bytes (type);
4715
4716 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4717 return 0;
4718
4719 if (VECTOR_MODE_P (mode) || mode == TImode)
4720 {
4721 /* User-created vectors small enough to fit in EAX. */
4722 if (size < 8)
4723 return 0;
4724
4725 /* MMX/3dNow values are returned in MM0,
4726 except when it doesn't exits. */
4727 if (size == 8)
4728 return (TARGET_MMX ? 0 : 1);
4729
4730 /* SSE values are returned in XMM0, except when it doesn't exist. */
4731 if (size == 16)
4732 return (TARGET_SSE ? 0 : 1);
4733 }
4734
4735 if (mode == XFmode)
4736 return 0;
4737
4738 if (mode == TDmode)
4739 return 1;
4740
4741 if (size > 12)
4742 return 1;
4743 return 0;
4744 }
4745
4746 static int
4747 return_in_memory_64 (const_tree type, enum machine_mode mode)
4748 {
4749 int needed_intregs, needed_sseregs;
4750 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4751 }
4752
4753 static int
4754 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
4755 {
4756 HOST_WIDE_INT size = int_size_in_bytes (type);
4757
4758 /* __m128 and friends are returned in xmm0. */
4759 if (!COMPLEX_MODE_P (mode) && size == 16 && VECTOR_MODE_P (mode))
4760 return 0;
4761
4762 /* Otherwise, the size must be exactly in [1248]. But not for complex. */
4763 return (size != 1 && size != 2 && size != 4 && size != 8)
4764 || COMPLEX_MODE_P (mode);
4765 }
4766
4767 int
4768 ix86_return_in_memory (const_tree type)
4769 {
4770 const enum machine_mode mode = type_natural_mode (type);
4771
4772 if (TARGET_64BIT_MS_ABI)
4773 return return_in_memory_ms_64 (type, mode);
4774 else if (TARGET_64BIT)
4775 return return_in_memory_64 (type, mode);
4776 else
4777 return return_in_memory_32 (type, mode);
4778 }
4779
4780 /* Return false iff TYPE is returned in memory. This version is used
4781 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4782 but differs notably in that when MMX is available, 8-byte vectors
4783 are returned in memory, rather than in MMX registers. */
4784
4785 int
4786 ix86_sol10_return_in_memory (const_tree type)
4787 {
4788 int size;
4789 enum machine_mode mode = type_natural_mode (type);
4790
4791 if (TARGET_64BIT)
4792 return return_in_memory_64 (type, mode);
4793
4794 if (mode == BLKmode)
4795 return 1;
4796
4797 size = int_size_in_bytes (type);
4798
4799 if (VECTOR_MODE_P (mode))
4800 {
4801 /* Return in memory only if MMX registers *are* available. This
4802 seems backwards, but it is consistent with the existing
4803 Solaris x86 ABI. */
4804 if (size == 8)
4805 return TARGET_MMX;
4806 if (size == 16)
4807 return !TARGET_SSE;
4808 }
4809 else if (mode == TImode)
4810 return !TARGET_SSE;
4811 else if (mode == XFmode)
4812 return 0;
4813
4814 return size > 12;
4815 }
4816
4817 /* When returning SSE vector types, we have a choice of either
4818 (1) being abi incompatible with a -march switch, or
4819 (2) generating an error.
4820 Given no good solution, I think the safest thing is one warning.
4821 The user won't be able to use -Werror, but....
4822
4823 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4824 called in response to actually generating a caller or callee that
4825 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4826 via aggregate_value_p for general type probing from tree-ssa. */
4827
4828 static rtx
4829 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4830 {
4831 static bool warnedsse, warnedmmx;
4832
4833 if (!TARGET_64BIT && type)
4834 {
4835 /* Look at the return type of the function, not the function type. */
4836 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4837
4838 if (!TARGET_SSE && !warnedsse)
4839 {
4840 if (mode == TImode
4841 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4842 {
4843 warnedsse = true;
4844 warning (0, "SSE vector return without SSE enabled "
4845 "changes the ABI");
4846 }
4847 }
4848
4849 if (!TARGET_MMX && !warnedmmx)
4850 {
4851 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4852 {
4853 warnedmmx = true;
4854 warning (0, "MMX vector return without MMX enabled "
4855 "changes the ABI");
4856 }
4857 }
4858 }
4859
4860 return NULL;
4861 }
4862
4863 \f
4864 /* Create the va_list data type. */
4865
4866 static tree
4867 ix86_build_builtin_va_list (void)
4868 {
4869 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4870
4871 /* For i386 we use plain pointer to argument area. */
4872 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4873 return build_pointer_type (char_type_node);
4874
4875 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4876 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4877
4878 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4879 unsigned_type_node);
4880 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4881 unsigned_type_node);
4882 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4883 ptr_type_node);
4884 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4885 ptr_type_node);
4886
4887 va_list_gpr_counter_field = f_gpr;
4888 va_list_fpr_counter_field = f_fpr;
4889
4890 DECL_FIELD_CONTEXT (f_gpr) = record;
4891 DECL_FIELD_CONTEXT (f_fpr) = record;
4892 DECL_FIELD_CONTEXT (f_ovf) = record;
4893 DECL_FIELD_CONTEXT (f_sav) = record;
4894
4895 TREE_CHAIN (record) = type_decl;
4896 TYPE_NAME (record) = type_decl;
4897 TYPE_FIELDS (record) = f_gpr;
4898 TREE_CHAIN (f_gpr) = f_fpr;
4899 TREE_CHAIN (f_fpr) = f_ovf;
4900 TREE_CHAIN (f_ovf) = f_sav;
4901
4902 layout_type (record);
4903
4904 /* The correct type is an array type of one element. */
4905 return build_array_type (record, build_index_type (size_zero_node));
4906 }
4907
4908 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4909
4910 static void
4911 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4912 {
4913 rtx save_area, mem;
4914 rtx label;
4915 rtx label_ref;
4916 rtx tmp_reg;
4917 rtx nsse_reg;
4918 alias_set_type set;
4919 int i;
4920
4921 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4922 return;
4923
4924 /* Indicate to allocate space on the stack for varargs save area. */
4925 ix86_save_varrargs_registers = 1;
4926 /* We need 16-byte stack alignment to save SSE registers. If user
4927 asked for lower preferred_stack_boundary, lets just hope that he knows
4928 what he is doing and won't varargs SSE values.
4929
4930 We also may end up assuming that only 64bit values are stored in SSE
4931 register let some floating point program work. */
4932 if (ix86_preferred_stack_boundary >= 128)
4933 cfun->stack_alignment_needed = 128;
4934
4935 save_area = frame_pointer_rtx;
4936 set = get_varargs_alias_set ();
4937
4938 for (i = cum->regno;
4939 i < ix86_regparm
4940 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4941 i++)
4942 {
4943 mem = gen_rtx_MEM (Pmode,
4944 plus_constant (save_area, i * UNITS_PER_WORD));
4945 MEM_NOTRAP_P (mem) = 1;
4946 set_mem_alias_set (mem, set);
4947 emit_move_insn (mem, gen_rtx_REG (Pmode,
4948 x86_64_int_parameter_registers[i]));
4949 }
4950
4951 if (cum->sse_nregs && cfun->va_list_fpr_size)
4952 {
4953 /* Now emit code to save SSE registers. The AX parameter contains number
4954 of SSE parameter registers used to call this function. We use
4955 sse_prologue_save insn template that produces computed jump across
4956 SSE saves. We need some preparation work to get this working. */
4957
4958 label = gen_label_rtx ();
4959 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4960
4961 /* Compute address to jump to :
4962 label - 5*eax + nnamed_sse_arguments*5 */
4963 tmp_reg = gen_reg_rtx (Pmode);
4964 nsse_reg = gen_reg_rtx (Pmode);
4965 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
4966 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4967 gen_rtx_MULT (Pmode, nsse_reg,
4968 GEN_INT (4))));
4969 if (cum->sse_regno)
4970 emit_move_insn
4971 (nsse_reg,
4972 gen_rtx_CONST (DImode,
4973 gen_rtx_PLUS (DImode,
4974 label_ref,
4975 GEN_INT (cum->sse_regno * 4))));
4976 else
4977 emit_move_insn (nsse_reg, label_ref);
4978 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4979
4980 /* Compute address of memory block we save into. We always use pointer
4981 pointing 127 bytes after first byte to store - this is needed to keep
4982 instruction size limited by 4 bytes. */
4983 tmp_reg = gen_reg_rtx (Pmode);
4984 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4985 plus_constant (save_area,
4986 8 * REGPARM_MAX + 127)));
4987 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4988 MEM_NOTRAP_P (mem) = 1;
4989 set_mem_alias_set (mem, set);
4990 set_mem_align (mem, BITS_PER_WORD);
4991
4992 /* And finally do the dirty job! */
4993 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4994 GEN_INT (cum->sse_regno), label));
4995 }
4996 }
4997
4998 static void
4999 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
5000 {
5001 alias_set_type set = get_varargs_alias_set ();
5002 int i;
5003
5004 for (i = cum->regno; i < REGPARM_MAX; i++)
5005 {
5006 rtx reg, mem;
5007
5008 mem = gen_rtx_MEM (Pmode,
5009 plus_constant (virtual_incoming_args_rtx,
5010 i * UNITS_PER_WORD));
5011 MEM_NOTRAP_P (mem) = 1;
5012 set_mem_alias_set (mem, set);
5013
5014 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
5015 emit_move_insn (mem, reg);
5016 }
5017 }
5018
5019 static void
5020 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5021 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5022 int no_rtl)
5023 {
5024 CUMULATIVE_ARGS next_cum;
5025 tree fntype;
5026
5027 /* This argument doesn't appear to be used anymore. Which is good,
5028 because the old code here didn't suppress rtl generation. */
5029 gcc_assert (!no_rtl);
5030
5031 if (!TARGET_64BIT)
5032 return;
5033
5034 fntype = TREE_TYPE (current_function_decl);
5035
5036 /* For varargs, we do not want to skip the dummy va_dcl argument.
5037 For stdargs, we do want to skip the last named argument. */
5038 next_cum = *cum;
5039 if (stdarg_p (fntype))
5040 function_arg_advance (&next_cum, mode, type, 1);
5041
5042 if (TARGET_64BIT_MS_ABI)
5043 setup_incoming_varargs_ms_64 (&next_cum);
5044 else
5045 setup_incoming_varargs_64 (&next_cum);
5046 }
5047
5048 /* Implement va_start. */
5049
5050 static void
5051 ix86_va_start (tree valist, rtx nextarg)
5052 {
5053 HOST_WIDE_INT words, n_gpr, n_fpr;
5054 tree f_gpr, f_fpr, f_ovf, f_sav;
5055 tree gpr, fpr, ovf, sav, t;
5056 tree type;
5057
5058 /* Only 64bit target needs something special. */
5059 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5060 {
5061 std_expand_builtin_va_start (valist, nextarg);
5062 return;
5063 }
5064
5065 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5066 f_fpr = TREE_CHAIN (f_gpr);
5067 f_ovf = TREE_CHAIN (f_fpr);
5068 f_sav = TREE_CHAIN (f_ovf);
5069
5070 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
5071 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5072 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5073 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5074 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5075
5076 /* Count number of gp and fp argument registers used. */
5077 words = current_function_args_info.words;
5078 n_gpr = current_function_args_info.regno;
5079 n_fpr = current_function_args_info.sse_regno;
5080
5081 if (cfun->va_list_gpr_size)
5082 {
5083 type = TREE_TYPE (gpr);
5084 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
5085 build_int_cst (type, n_gpr * 8));
5086 TREE_SIDE_EFFECTS (t) = 1;
5087 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5088 }
5089
5090 if (cfun->va_list_fpr_size)
5091 {
5092 type = TREE_TYPE (fpr);
5093 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
5094 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
5095 TREE_SIDE_EFFECTS (t) = 1;
5096 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5097 }
5098
5099 /* Find the overflow area. */
5100 type = TREE_TYPE (ovf);
5101 t = make_tree (type, virtual_incoming_args_rtx);
5102 if (words != 0)
5103 t = build2 (POINTER_PLUS_EXPR, type, t,
5104 size_int (words * UNITS_PER_WORD));
5105 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
5106 TREE_SIDE_EFFECTS (t) = 1;
5107 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5108
5109 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
5110 {
5111 /* Find the register save area.
5112 Prologue of the function save it right above stack frame. */
5113 type = TREE_TYPE (sav);
5114 t = make_tree (type, frame_pointer_rtx);
5115 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
5116 TREE_SIDE_EFFECTS (t) = 1;
5117 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5118 }
5119 }
5120
5121 /* Implement va_arg. */
5122
5123 static tree
5124 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
5125 {
5126 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
5127 tree f_gpr, f_fpr, f_ovf, f_sav;
5128 tree gpr, fpr, ovf, sav, t;
5129 int size, rsize;
5130 tree lab_false, lab_over = NULL_TREE;
5131 tree addr, t2;
5132 rtx container;
5133 int indirect_p = 0;
5134 tree ptrtype;
5135 enum machine_mode nat_mode;
5136
5137 /* Only 64bit target needs something special. */
5138 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5139 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5140
5141 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5142 f_fpr = TREE_CHAIN (f_gpr);
5143 f_ovf = TREE_CHAIN (f_fpr);
5144 f_sav = TREE_CHAIN (f_ovf);
5145
5146 valist = build_va_arg_indirect_ref (valist);
5147 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5148 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5149 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5150 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5151
5152 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5153 if (indirect_p)
5154 type = build_pointer_type (type);
5155 size = int_size_in_bytes (type);
5156 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5157
5158 nat_mode = type_natural_mode (type);
5159 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
5160 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
5161
5162 /* Pull the value out of the saved registers. */
5163
5164 addr = create_tmp_var (ptr_type_node, "addr");
5165 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
5166
5167 if (container)
5168 {
5169 int needed_intregs, needed_sseregs;
5170 bool need_temp;
5171 tree int_addr, sse_addr;
5172
5173 lab_false = create_artificial_label ();
5174 lab_over = create_artificial_label ();
5175
5176 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
5177
5178 need_temp = (!REG_P (container)
5179 && ((needed_intregs && TYPE_ALIGN (type) > 64)
5180 || TYPE_ALIGN (type) > 128));
5181
5182 /* In case we are passing structure, verify that it is consecutive block
5183 on the register save area. If not we need to do moves. */
5184 if (!need_temp && !REG_P (container))
5185 {
5186 /* Verify that all registers are strictly consecutive */
5187 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5188 {
5189 int i;
5190
5191 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5192 {
5193 rtx slot = XVECEXP (container, 0, i);
5194 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5195 || INTVAL (XEXP (slot, 1)) != i * 16)
5196 need_temp = 1;
5197 }
5198 }
5199 else
5200 {
5201 int i;
5202
5203 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5204 {
5205 rtx slot = XVECEXP (container, 0, i);
5206 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5207 || INTVAL (XEXP (slot, 1)) != i * 8)
5208 need_temp = 1;
5209 }
5210 }
5211 }
5212 if (!need_temp)
5213 {
5214 int_addr = addr;
5215 sse_addr = addr;
5216 }
5217 else
5218 {
5219 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5220 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5221 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5222 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5223 }
5224
5225 /* First ensure that we fit completely in registers. */
5226 if (needed_intregs)
5227 {
5228 t = build_int_cst (TREE_TYPE (gpr),
5229 (REGPARM_MAX - needed_intregs + 1) * 8);
5230 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5231 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5232 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5233 gimplify_and_add (t, pre_p);
5234 }
5235 if (needed_sseregs)
5236 {
5237 t = build_int_cst (TREE_TYPE (fpr),
5238 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5239 + REGPARM_MAX * 8);
5240 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5241 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5242 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5243 gimplify_and_add (t, pre_p);
5244 }
5245
5246 /* Compute index to start of area used for integer regs. */
5247 if (needed_intregs)
5248 {
5249 /* int_addr = gpr + sav; */
5250 t = fold_convert (sizetype, gpr);
5251 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5252 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5253 gimplify_and_add (t, pre_p);
5254 }
5255 if (needed_sseregs)
5256 {
5257 /* sse_addr = fpr + sav; */
5258 t = fold_convert (sizetype, fpr);
5259 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5260 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5261 gimplify_and_add (t, pre_p);
5262 }
5263 if (need_temp)
5264 {
5265 int i;
5266 tree temp = create_tmp_var (type, "va_arg_tmp");
5267
5268 /* addr = &temp; */
5269 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5270 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5271 gimplify_and_add (t, pre_p);
5272
5273 for (i = 0; i < XVECLEN (container, 0); i++)
5274 {
5275 rtx slot = XVECEXP (container, 0, i);
5276 rtx reg = XEXP (slot, 0);
5277 enum machine_mode mode = GET_MODE (reg);
5278 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5279 tree addr_type = build_pointer_type (piece_type);
5280 tree src_addr, src;
5281 int src_offset;
5282 tree dest_addr, dest;
5283
5284 if (SSE_REGNO_P (REGNO (reg)))
5285 {
5286 src_addr = sse_addr;
5287 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5288 }
5289 else
5290 {
5291 src_addr = int_addr;
5292 src_offset = REGNO (reg) * 8;
5293 }
5294 src_addr = fold_convert (addr_type, src_addr);
5295 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5296 size_int (src_offset));
5297 src = build_va_arg_indirect_ref (src_addr);
5298
5299 dest_addr = fold_convert (addr_type, addr);
5300 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5301 size_int (INTVAL (XEXP (slot, 1))));
5302 dest = build_va_arg_indirect_ref (dest_addr);
5303
5304 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5305 gimplify_and_add (t, pre_p);
5306 }
5307 }
5308
5309 if (needed_intregs)
5310 {
5311 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5312 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5313 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5314 gimplify_and_add (t, pre_p);
5315 }
5316 if (needed_sseregs)
5317 {
5318 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5319 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5320 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5321 gimplify_and_add (t, pre_p);
5322 }
5323
5324 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5325 gimplify_and_add (t, pre_p);
5326
5327 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5328 append_to_statement_list (t, pre_p);
5329 }
5330
5331 /* ... otherwise out of the overflow area. */
5332
5333 /* Care for on-stack alignment if needed. */
5334 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5335 || integer_zerop (TYPE_SIZE (type)))
5336 t = ovf;
5337 else
5338 {
5339 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5340 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5341 size_int (align - 1));
5342 t = fold_convert (sizetype, t);
5343 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5344 size_int (-align));
5345 t = fold_convert (TREE_TYPE (ovf), t);
5346 }
5347 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5348
5349 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5350 gimplify_and_add (t2, pre_p);
5351
5352 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5353 size_int (rsize * UNITS_PER_WORD));
5354 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5355 gimplify_and_add (t, pre_p);
5356
5357 if (container)
5358 {
5359 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5360 append_to_statement_list (t, pre_p);
5361 }
5362
5363 ptrtype = build_pointer_type (type);
5364 addr = fold_convert (ptrtype, addr);
5365
5366 if (indirect_p)
5367 addr = build_va_arg_indirect_ref (addr);
5368 return build_va_arg_indirect_ref (addr);
5369 }
5370 \f
5371 /* Return nonzero if OPNUM's MEM should be matched
5372 in movabs* patterns. */
5373
5374 int
5375 ix86_check_movabs (rtx insn, int opnum)
5376 {
5377 rtx set, mem;
5378
5379 set = PATTERN (insn);
5380 if (GET_CODE (set) == PARALLEL)
5381 set = XVECEXP (set, 0, 0);
5382 gcc_assert (GET_CODE (set) == SET);
5383 mem = XEXP (set, opnum);
5384 while (GET_CODE (mem) == SUBREG)
5385 mem = SUBREG_REG (mem);
5386 gcc_assert (MEM_P (mem));
5387 return (volatile_ok || !MEM_VOLATILE_P (mem));
5388 }
5389 \f
5390 /* Initialize the table of extra 80387 mathematical constants. */
5391
5392 static void
5393 init_ext_80387_constants (void)
5394 {
5395 static const char * cst[5] =
5396 {
5397 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5398 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5399 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5400 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5401 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5402 };
5403 int i;
5404
5405 for (i = 0; i < 5; i++)
5406 {
5407 real_from_string (&ext_80387_constants_table[i], cst[i]);
5408 /* Ensure each constant is rounded to XFmode precision. */
5409 real_convert (&ext_80387_constants_table[i],
5410 XFmode, &ext_80387_constants_table[i]);
5411 }
5412
5413 ext_80387_constants_init = 1;
5414 }
5415
5416 /* Return true if the constant is something that can be loaded with
5417 a special instruction. */
5418
5419 int
5420 standard_80387_constant_p (rtx x)
5421 {
5422 enum machine_mode mode = GET_MODE (x);
5423
5424 REAL_VALUE_TYPE r;
5425
5426 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5427 return -1;
5428
5429 if (x == CONST0_RTX (mode))
5430 return 1;
5431 if (x == CONST1_RTX (mode))
5432 return 2;
5433
5434 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5435
5436 /* For XFmode constants, try to find a special 80387 instruction when
5437 optimizing for size or on those CPUs that benefit from them. */
5438 if (mode == XFmode
5439 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5440 {
5441 int i;
5442
5443 if (! ext_80387_constants_init)
5444 init_ext_80387_constants ();
5445
5446 for (i = 0; i < 5; i++)
5447 if (real_identical (&r, &ext_80387_constants_table[i]))
5448 return i + 3;
5449 }
5450
5451 /* Load of the constant -0.0 or -1.0 will be split as
5452 fldz;fchs or fld1;fchs sequence. */
5453 if (real_isnegzero (&r))
5454 return 8;
5455 if (real_identical (&r, &dconstm1))
5456 return 9;
5457
5458 return 0;
5459 }
5460
5461 /* Return the opcode of the special instruction to be used to load
5462 the constant X. */
5463
5464 const char *
5465 standard_80387_constant_opcode (rtx x)
5466 {
5467 switch (standard_80387_constant_p (x))
5468 {
5469 case 1:
5470 return "fldz";
5471 case 2:
5472 return "fld1";
5473 case 3:
5474 return "fldlg2";
5475 case 4:
5476 return "fldln2";
5477 case 5:
5478 return "fldl2e";
5479 case 6:
5480 return "fldl2t";
5481 case 7:
5482 return "fldpi";
5483 case 8:
5484 case 9:
5485 return "#";
5486 default:
5487 gcc_unreachable ();
5488 }
5489 }
5490
5491 /* Return the CONST_DOUBLE representing the 80387 constant that is
5492 loaded by the specified special instruction. The argument IDX
5493 matches the return value from standard_80387_constant_p. */
5494
5495 rtx
5496 standard_80387_constant_rtx (int idx)
5497 {
5498 int i;
5499
5500 if (! ext_80387_constants_init)
5501 init_ext_80387_constants ();
5502
5503 switch (idx)
5504 {
5505 case 3:
5506 case 4:
5507 case 5:
5508 case 6:
5509 case 7:
5510 i = idx - 3;
5511 break;
5512
5513 default:
5514 gcc_unreachable ();
5515 }
5516
5517 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5518 XFmode);
5519 }
5520
5521 /* Return 1 if mode is a valid mode for sse. */
5522 static int
5523 standard_sse_mode_p (enum machine_mode mode)
5524 {
5525 switch (mode)
5526 {
5527 case V16QImode:
5528 case V8HImode:
5529 case V4SImode:
5530 case V2DImode:
5531 case V4SFmode:
5532 case V2DFmode:
5533 return 1;
5534
5535 default:
5536 return 0;
5537 }
5538 }
5539
5540 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5541 */
5542 int
5543 standard_sse_constant_p (rtx x)
5544 {
5545 enum machine_mode mode = GET_MODE (x);
5546
5547 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5548 return 1;
5549 if (vector_all_ones_operand (x, mode)
5550 && standard_sse_mode_p (mode))
5551 return TARGET_SSE2 ? 2 : -1;
5552
5553 return 0;
5554 }
5555
5556 /* Return the opcode of the special instruction to be used to load
5557 the constant X. */
5558
5559 const char *
5560 standard_sse_constant_opcode (rtx insn, rtx x)
5561 {
5562 switch (standard_sse_constant_p (x))
5563 {
5564 case 1:
5565 if (get_attr_mode (insn) == MODE_V4SF)
5566 return "xorps\t%0, %0";
5567 else if (get_attr_mode (insn) == MODE_V2DF)
5568 return "xorpd\t%0, %0";
5569 else
5570 return "pxor\t%0, %0";
5571 case 2:
5572 return "pcmpeqd\t%0, %0";
5573 }
5574 gcc_unreachable ();
5575 }
5576
5577 /* Returns 1 if OP contains a symbol reference */
5578
5579 int
5580 symbolic_reference_mentioned_p (rtx op)
5581 {
5582 const char *fmt;
5583 int i;
5584
5585 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5586 return 1;
5587
5588 fmt = GET_RTX_FORMAT (GET_CODE (op));
5589 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5590 {
5591 if (fmt[i] == 'E')
5592 {
5593 int j;
5594
5595 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5596 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5597 return 1;
5598 }
5599
5600 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5601 return 1;
5602 }
5603
5604 return 0;
5605 }
5606
5607 /* Return 1 if it is appropriate to emit `ret' instructions in the
5608 body of a function. Do this only if the epilogue is simple, needing a
5609 couple of insns. Prior to reloading, we can't tell how many registers
5610 must be saved, so return 0 then. Return 0 if there is no frame
5611 marker to de-allocate. */
5612
5613 int
5614 ix86_can_use_return_insn_p (void)
5615 {
5616 struct ix86_frame frame;
5617
5618 if (! reload_completed || frame_pointer_needed)
5619 return 0;
5620
5621 /* Don't allow more than 32 pop, since that's all we can do
5622 with one instruction. */
5623 if (current_function_pops_args
5624 && current_function_args_size >= 32768)
5625 return 0;
5626
5627 ix86_compute_frame_layout (&frame);
5628 return frame.to_allocate == 0 && frame.nregs == 0;
5629 }
5630 \f
5631 /* Value should be nonzero if functions must have frame pointers.
5632 Zero means the frame pointer need not be set up (and parms may
5633 be accessed via the stack pointer) in functions that seem suitable. */
5634
5635 int
5636 ix86_frame_pointer_required (void)
5637 {
5638 /* If we accessed previous frames, then the generated code expects
5639 to be able to access the saved ebp value in our frame. */
5640 if (cfun->machine->accesses_prev_frame)
5641 return 1;
5642
5643 /* Several x86 os'es need a frame pointer for other reasons,
5644 usually pertaining to setjmp. */
5645 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5646 return 1;
5647
5648 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5649 the frame pointer by default. Turn it back on now if we've not
5650 got a leaf function. */
5651 if (TARGET_OMIT_LEAF_FRAME_POINTER
5652 && (!current_function_is_leaf
5653 || ix86_current_function_calls_tls_descriptor))
5654 return 1;
5655
5656 if (current_function_profile)
5657 return 1;
5658
5659 return 0;
5660 }
5661
5662 /* Record that the current function accesses previous call frames. */
5663
5664 void
5665 ix86_setup_frame_addresses (void)
5666 {
5667 cfun->machine->accesses_prev_frame = 1;
5668 }
5669 \f
5670 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5671 # define USE_HIDDEN_LINKONCE 1
5672 #else
5673 # define USE_HIDDEN_LINKONCE 0
5674 #endif
5675
5676 static int pic_labels_used;
5677
5678 /* Fills in the label name that should be used for a pc thunk for
5679 the given register. */
5680
5681 static void
5682 get_pc_thunk_name (char name[32], unsigned int regno)
5683 {
5684 gcc_assert (!TARGET_64BIT);
5685
5686 if (USE_HIDDEN_LINKONCE)
5687 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5688 else
5689 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5690 }
5691
5692
5693 /* This function generates code for -fpic that loads %ebx with
5694 the return address of the caller and then returns. */
5695
5696 void
5697 ix86_file_end (void)
5698 {
5699 rtx xops[2];
5700 int regno;
5701
5702 for (regno = 0; regno < 8; ++regno)
5703 {
5704 char name[32];
5705
5706 if (! ((pic_labels_used >> regno) & 1))
5707 continue;
5708
5709 get_pc_thunk_name (name, regno);
5710
5711 #if TARGET_MACHO
5712 if (TARGET_MACHO)
5713 {
5714 switch_to_section (darwin_sections[text_coal_section]);
5715 fputs ("\t.weak_definition\t", asm_out_file);
5716 assemble_name (asm_out_file, name);
5717 fputs ("\n\t.private_extern\t", asm_out_file);
5718 assemble_name (asm_out_file, name);
5719 fputs ("\n", asm_out_file);
5720 ASM_OUTPUT_LABEL (asm_out_file, name);
5721 }
5722 else
5723 #endif
5724 if (USE_HIDDEN_LINKONCE)
5725 {
5726 tree decl;
5727
5728 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5729 error_mark_node);
5730 TREE_PUBLIC (decl) = 1;
5731 TREE_STATIC (decl) = 1;
5732 DECL_ONE_ONLY (decl) = 1;
5733
5734 (*targetm.asm_out.unique_section) (decl, 0);
5735 switch_to_section (get_named_section (decl, NULL, 0));
5736
5737 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5738 fputs ("\t.hidden\t", asm_out_file);
5739 assemble_name (asm_out_file, name);
5740 fputc ('\n', asm_out_file);
5741 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5742 }
5743 else
5744 {
5745 switch_to_section (text_section);
5746 ASM_OUTPUT_LABEL (asm_out_file, name);
5747 }
5748
5749 xops[0] = gen_rtx_REG (SImode, regno);
5750 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5751 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5752 output_asm_insn ("ret", xops);
5753 }
5754
5755 if (NEED_INDICATE_EXEC_STACK)
5756 file_end_indicate_exec_stack ();
5757 }
5758
5759 /* Emit code for the SET_GOT patterns. */
5760
5761 const char *
5762 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5763 {
5764 rtx xops[3];
5765
5766 xops[0] = dest;
5767
5768 if (TARGET_VXWORKS_RTP && flag_pic)
5769 {
5770 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5771 xops[2] = gen_rtx_MEM (Pmode,
5772 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5773 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5774
5775 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5776 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5777 an unadorned address. */
5778 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5779 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5780 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5781 return "";
5782 }
5783
5784 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5785
5786 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5787 {
5788 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5789
5790 if (!flag_pic)
5791 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5792 else
5793 output_asm_insn ("call\t%a2", xops);
5794
5795 #if TARGET_MACHO
5796 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5797 is what will be referenced by the Mach-O PIC subsystem. */
5798 if (!label)
5799 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5800 #endif
5801
5802 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5803 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5804
5805 if (flag_pic)
5806 output_asm_insn ("pop{l}\t%0", xops);
5807 }
5808 else
5809 {
5810 char name[32];
5811 get_pc_thunk_name (name, REGNO (dest));
5812 pic_labels_used |= 1 << REGNO (dest);
5813
5814 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5815 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5816 output_asm_insn ("call\t%X2", xops);
5817 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5818 is what will be referenced by the Mach-O PIC subsystem. */
5819 #if TARGET_MACHO
5820 if (!label)
5821 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5822 else
5823 targetm.asm_out.internal_label (asm_out_file, "L",
5824 CODE_LABEL_NUMBER (label));
5825 #endif
5826 }
5827
5828 if (TARGET_MACHO)
5829 return "";
5830
5831 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5832 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5833 else
5834 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5835
5836 return "";
5837 }
5838
5839 /* Generate an "push" pattern for input ARG. */
5840
5841 static rtx
5842 gen_push (rtx arg)
5843 {
5844 return gen_rtx_SET (VOIDmode,
5845 gen_rtx_MEM (Pmode,
5846 gen_rtx_PRE_DEC (Pmode,
5847 stack_pointer_rtx)),
5848 arg);
5849 }
5850
5851 /* Return >= 0 if there is an unused call-clobbered register available
5852 for the entire function. */
5853
5854 static unsigned int
5855 ix86_select_alt_pic_regnum (void)
5856 {
5857 if (current_function_is_leaf && !current_function_profile
5858 && !ix86_current_function_calls_tls_descriptor)
5859 {
5860 int i;
5861 for (i = 2; i >= 0; --i)
5862 if (!df_regs_ever_live_p (i))
5863 return i;
5864 }
5865
5866 return INVALID_REGNUM;
5867 }
5868
5869 /* Return 1 if we need to save REGNO. */
5870 static int
5871 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5872 {
5873 if (pic_offset_table_rtx
5874 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5875 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5876 || current_function_profile
5877 || current_function_calls_eh_return
5878 || current_function_uses_const_pool))
5879 {
5880 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5881 return 0;
5882 return 1;
5883 }
5884
5885 if (current_function_calls_eh_return && maybe_eh_return)
5886 {
5887 unsigned i;
5888 for (i = 0; ; i++)
5889 {
5890 unsigned test = EH_RETURN_DATA_REGNO (i);
5891 if (test == INVALID_REGNUM)
5892 break;
5893 if (test == regno)
5894 return 1;
5895 }
5896 }
5897
5898 if (cfun->machine->force_align_arg_pointer
5899 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5900 return 1;
5901
5902 return (df_regs_ever_live_p (regno)
5903 && !call_used_regs[regno]
5904 && !fixed_regs[regno]
5905 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5906 }
5907
5908 /* Return number of registers to be saved on the stack. */
5909
5910 static int
5911 ix86_nsaved_regs (void)
5912 {
5913 int nregs = 0;
5914 int regno;
5915
5916 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5917 if (ix86_save_reg (regno, true))
5918 nregs++;
5919 return nregs;
5920 }
5921
5922 /* Return the offset between two registers, one to be eliminated, and the other
5923 its replacement, at the start of a routine. */
5924
5925 HOST_WIDE_INT
5926 ix86_initial_elimination_offset (int from, int to)
5927 {
5928 struct ix86_frame frame;
5929 ix86_compute_frame_layout (&frame);
5930
5931 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5932 return frame.hard_frame_pointer_offset;
5933 else if (from == FRAME_POINTER_REGNUM
5934 && to == HARD_FRAME_POINTER_REGNUM)
5935 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5936 else
5937 {
5938 gcc_assert (to == STACK_POINTER_REGNUM);
5939
5940 if (from == ARG_POINTER_REGNUM)
5941 return frame.stack_pointer_offset;
5942
5943 gcc_assert (from == FRAME_POINTER_REGNUM);
5944 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5945 }
5946 }
5947
5948 /* Fill structure ix86_frame about frame of currently computed function. */
5949
5950 static void
5951 ix86_compute_frame_layout (struct ix86_frame *frame)
5952 {
5953 HOST_WIDE_INT total_size;
5954 unsigned int stack_alignment_needed;
5955 HOST_WIDE_INT offset;
5956 unsigned int preferred_alignment;
5957 HOST_WIDE_INT size = get_frame_size ();
5958
5959 frame->nregs = ix86_nsaved_regs ();
5960 total_size = size;
5961
5962 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5963 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5964
5965 /* During reload iteration the amount of registers saved can change.
5966 Recompute the value as needed. Do not recompute when amount of registers
5967 didn't change as reload does multiple calls to the function and does not
5968 expect the decision to change within single iteration. */
5969 if (!optimize_size
5970 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5971 {
5972 int count = frame->nregs;
5973
5974 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5975 /* The fast prologue uses move instead of push to save registers. This
5976 is significantly longer, but also executes faster as modern hardware
5977 can execute the moves in parallel, but can't do that for push/pop.
5978
5979 Be careful about choosing what prologue to emit: When function takes
5980 many instructions to execute we may use slow version as well as in
5981 case function is known to be outside hot spot (this is known with
5982 feedback only). Weight the size of function by number of registers
5983 to save as it is cheap to use one or two push instructions but very
5984 slow to use many of them. */
5985 if (count)
5986 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5987 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5988 || (flag_branch_probabilities
5989 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5990 cfun->machine->use_fast_prologue_epilogue = false;
5991 else
5992 cfun->machine->use_fast_prologue_epilogue
5993 = !expensive_function_p (count);
5994 }
5995 if (TARGET_PROLOGUE_USING_MOVE
5996 && cfun->machine->use_fast_prologue_epilogue)
5997 frame->save_regs_using_mov = true;
5998 else
5999 frame->save_regs_using_mov = false;
6000
6001
6002 /* Skip return address and saved base pointer. */
6003 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
6004
6005 frame->hard_frame_pointer_offset = offset;
6006
6007 /* Do some sanity checking of stack_alignment_needed and
6008 preferred_alignment, since i386 port is the only using those features
6009 that may break easily. */
6010
6011 gcc_assert (!size || stack_alignment_needed);
6012 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6013 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6014 gcc_assert (stack_alignment_needed
6015 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6016
6017 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
6018 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
6019
6020 /* Register save area */
6021 offset += frame->nregs * UNITS_PER_WORD;
6022
6023 /* Va-arg area */
6024 if (ix86_save_varrargs_registers)
6025 {
6026 offset += X86_64_VARARGS_SIZE;
6027 frame->va_arg_size = X86_64_VARARGS_SIZE;
6028 }
6029 else
6030 frame->va_arg_size = 0;
6031
6032 /* Align start of frame for local function. */
6033 frame->padding1 = ((offset + stack_alignment_needed - 1)
6034 & -stack_alignment_needed) - offset;
6035
6036 offset += frame->padding1;
6037
6038 /* Frame pointer points here. */
6039 frame->frame_pointer_offset = offset;
6040
6041 offset += size;
6042
6043 /* Add outgoing arguments area. Can be skipped if we eliminated
6044 all the function calls as dead code.
6045 Skipping is however impossible when function calls alloca. Alloca
6046 expander assumes that last current_function_outgoing_args_size
6047 of stack frame are unused. */
6048 if (ACCUMULATE_OUTGOING_ARGS
6049 && (!current_function_is_leaf || current_function_calls_alloca
6050 || ix86_current_function_calls_tls_descriptor))
6051 {
6052 offset += current_function_outgoing_args_size;
6053 frame->outgoing_arguments_size = current_function_outgoing_args_size;
6054 }
6055 else
6056 frame->outgoing_arguments_size = 0;
6057
6058 /* Align stack boundary. Only needed if we're calling another function
6059 or using alloca. */
6060 if (!current_function_is_leaf || current_function_calls_alloca
6061 || ix86_current_function_calls_tls_descriptor)
6062 frame->padding2 = ((offset + preferred_alignment - 1)
6063 & -preferred_alignment) - offset;
6064 else
6065 frame->padding2 = 0;
6066
6067 offset += frame->padding2;
6068
6069 /* We've reached end of stack frame. */
6070 frame->stack_pointer_offset = offset;
6071
6072 /* Size prologue needs to allocate. */
6073 frame->to_allocate =
6074 (size + frame->padding1 + frame->padding2
6075 + frame->outgoing_arguments_size + frame->va_arg_size);
6076
6077 if ((!frame->to_allocate && frame->nregs <= 1)
6078 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
6079 frame->save_regs_using_mov = false;
6080
6081 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
6082 && current_function_is_leaf
6083 && !ix86_current_function_calls_tls_descriptor)
6084 {
6085 frame->red_zone_size = frame->to_allocate;
6086 if (frame->save_regs_using_mov)
6087 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6088 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6089 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6090 }
6091 else
6092 frame->red_zone_size = 0;
6093 frame->to_allocate -= frame->red_zone_size;
6094 frame->stack_pointer_offset -= frame->red_zone_size;
6095 #if 0
6096 fprintf (stderr, "\n");
6097 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
6098 fprintf (stderr, "size: %ld\n", (long)size);
6099 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
6100 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
6101 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
6102 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
6103 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
6104 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
6105 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
6106 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
6107 (long)frame->hard_frame_pointer_offset);
6108 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
6109 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
6110 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
6111 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
6112 #endif
6113 }
6114
6115 /* Emit code to save registers in the prologue. */
6116
6117 static void
6118 ix86_emit_save_regs (void)
6119 {
6120 unsigned int regno;
6121 rtx insn;
6122
6123 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
6124 if (ix86_save_reg (regno, true))
6125 {
6126 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
6127 RTX_FRAME_RELATED_P (insn) = 1;
6128 }
6129 }
6130
6131 /* Emit code to save registers using MOV insns. First register
6132 is restored from POINTER + OFFSET. */
6133 static void
6134 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
6135 {
6136 unsigned int regno;
6137 rtx insn;
6138
6139 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6140 if (ix86_save_reg (regno, true))
6141 {
6142 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
6143 Pmode, offset),
6144 gen_rtx_REG (Pmode, regno));
6145 RTX_FRAME_RELATED_P (insn) = 1;
6146 offset += UNITS_PER_WORD;
6147 }
6148 }
6149
6150 /* Expand prologue or epilogue stack adjustment.
6151 The pattern exist to put a dependency on all ebp-based memory accesses.
6152 STYLE should be negative if instructions should be marked as frame related,
6153 zero if %r11 register is live and cannot be freely used and positive
6154 otherwise. */
6155
6156 static void
6157 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
6158 {
6159 rtx insn;
6160
6161 if (! TARGET_64BIT)
6162 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
6163 else if (x86_64_immediate_operand (offset, DImode))
6164 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
6165 else
6166 {
6167 rtx r11;
6168 /* r11 is used by indirect sibcall return as well, set before the
6169 epilogue and used after the epilogue. ATM indirect sibcall
6170 shouldn't be used together with huge frame sizes in one
6171 function because of the frame_size check in sibcall.c. */
6172 gcc_assert (style);
6173 r11 = gen_rtx_REG (DImode, R11_REG);
6174 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
6175 if (style < 0)
6176 RTX_FRAME_RELATED_P (insn) = 1;
6177 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
6178 offset));
6179 }
6180 if (style < 0)
6181 RTX_FRAME_RELATED_P (insn) = 1;
6182 }
6183
6184 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6185
6186 static rtx
6187 ix86_internal_arg_pointer (void)
6188 {
6189 bool has_force_align_arg_pointer =
6190 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
6191 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
6192 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6193 && DECL_NAME (current_function_decl)
6194 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6195 && DECL_FILE_SCOPE_P (current_function_decl))
6196 || ix86_force_align_arg_pointer
6197 || has_force_align_arg_pointer)
6198 {
6199 /* Nested functions can't realign the stack due to a register
6200 conflict. */
6201 if (DECL_CONTEXT (current_function_decl)
6202 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
6203 {
6204 if (ix86_force_align_arg_pointer)
6205 warning (0, "-mstackrealign ignored for nested functions");
6206 if (has_force_align_arg_pointer)
6207 error ("%s not supported for nested functions",
6208 ix86_force_align_arg_pointer_string);
6209 return virtual_incoming_args_rtx;
6210 }
6211 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG);
6212 return copy_to_reg (cfun->machine->force_align_arg_pointer);
6213 }
6214 else
6215 return virtual_incoming_args_rtx;
6216 }
6217
6218 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6219 This is called from dwarf2out.c to emit call frame instructions
6220 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6221 static void
6222 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6223 {
6224 rtx unspec = SET_SRC (pattern);
6225 gcc_assert (GET_CODE (unspec) == UNSPEC);
6226
6227 switch (index)
6228 {
6229 case UNSPEC_REG_SAVE:
6230 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6231 SET_DEST (pattern));
6232 break;
6233 case UNSPEC_DEF_CFA:
6234 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6235 INTVAL (XVECEXP (unspec, 0, 0)));
6236 break;
6237 default:
6238 gcc_unreachable ();
6239 }
6240 }
6241
6242 /* Expand the prologue into a bunch of separate insns. */
6243
6244 void
6245 ix86_expand_prologue (void)
6246 {
6247 rtx insn;
6248 bool pic_reg_used;
6249 struct ix86_frame frame;
6250 HOST_WIDE_INT allocate;
6251
6252 ix86_compute_frame_layout (&frame);
6253
6254 if (cfun->machine->force_align_arg_pointer)
6255 {
6256 rtx x, y;
6257
6258 /* Grab the argument pointer. */
6259 x = plus_constant (stack_pointer_rtx, 4);
6260 y = cfun->machine->force_align_arg_pointer;
6261 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6262 RTX_FRAME_RELATED_P (insn) = 1;
6263
6264 /* The unwind info consists of two parts: install the fafp as the cfa,
6265 and record the fafp as the "save register" of the stack pointer.
6266 The later is there in order that the unwinder can see where it
6267 should restore the stack pointer across the and insn. */
6268 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6269 x = gen_rtx_SET (VOIDmode, y, x);
6270 RTX_FRAME_RELATED_P (x) = 1;
6271 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6272 UNSPEC_REG_SAVE);
6273 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6274 RTX_FRAME_RELATED_P (y) = 1;
6275 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6276 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6277 REG_NOTES (insn) = x;
6278
6279 /* Align the stack. */
6280 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6281 GEN_INT (-16)));
6282
6283 /* And here we cheat like madmen with the unwind info. We force the
6284 cfa register back to sp+4, which is exactly what it was at the
6285 start of the function. Re-pushing the return address results in
6286 the return at the same spot relative to the cfa, and thus is
6287 correct wrt the unwind info. */
6288 x = cfun->machine->force_align_arg_pointer;
6289 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6290 insn = emit_insn (gen_push (x));
6291 RTX_FRAME_RELATED_P (insn) = 1;
6292
6293 x = GEN_INT (4);
6294 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6295 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6296 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6297 REG_NOTES (insn) = x;
6298 }
6299
6300 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6301 slower on all targets. Also sdb doesn't like it. */
6302
6303 if (frame_pointer_needed)
6304 {
6305 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6306 RTX_FRAME_RELATED_P (insn) = 1;
6307
6308 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6309 RTX_FRAME_RELATED_P (insn) = 1;
6310 }
6311
6312 allocate = frame.to_allocate;
6313
6314 if (!frame.save_regs_using_mov)
6315 ix86_emit_save_regs ();
6316 else
6317 allocate += frame.nregs * UNITS_PER_WORD;
6318
6319 /* When using red zone we may start register saving before allocating
6320 the stack frame saving one cycle of the prologue. */
6321 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
6322 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6323 : stack_pointer_rtx,
6324 -frame.nregs * UNITS_PER_WORD);
6325
6326 if (allocate == 0)
6327 ;
6328 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6329 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6330 GEN_INT (-allocate), -1);
6331 else
6332 {
6333 /* Only valid for Win32. */
6334 rtx eax = gen_rtx_REG (Pmode, AX_REG);
6335 bool eax_live;
6336 rtx t;
6337
6338 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6339
6340 if (TARGET_64BIT_MS_ABI)
6341 eax_live = false;
6342 else
6343 eax_live = ix86_eax_live_at_start_p ();
6344
6345 if (eax_live)
6346 {
6347 emit_insn (gen_push (eax));
6348 allocate -= UNITS_PER_WORD;
6349 }
6350
6351 emit_move_insn (eax, GEN_INT (allocate));
6352
6353 if (TARGET_64BIT)
6354 insn = gen_allocate_stack_worker_64 (eax);
6355 else
6356 insn = gen_allocate_stack_worker_32 (eax);
6357 insn = emit_insn (insn);
6358 RTX_FRAME_RELATED_P (insn) = 1;
6359 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6360 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6361 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6362 t, REG_NOTES (insn));
6363
6364 if (eax_live)
6365 {
6366 if (frame_pointer_needed)
6367 t = plus_constant (hard_frame_pointer_rtx,
6368 allocate
6369 - frame.to_allocate
6370 - frame.nregs * UNITS_PER_WORD);
6371 else
6372 t = plus_constant (stack_pointer_rtx, allocate);
6373 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6374 }
6375 }
6376
6377 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
6378 {
6379 if (!frame_pointer_needed || !frame.to_allocate)
6380 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6381 else
6382 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6383 -frame.nregs * UNITS_PER_WORD);
6384 }
6385
6386 pic_reg_used = false;
6387 if (pic_offset_table_rtx
6388 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6389 || current_function_profile))
6390 {
6391 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6392
6393 if (alt_pic_reg_used != INVALID_REGNUM)
6394 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6395
6396 pic_reg_used = true;
6397 }
6398
6399 if (pic_reg_used)
6400 {
6401 if (TARGET_64BIT)
6402 {
6403 if (ix86_cmodel == CM_LARGE_PIC)
6404 {
6405 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
6406 rtx label = gen_label_rtx ();
6407 emit_label (label);
6408 LABEL_PRESERVE_P (label) = 1;
6409 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6410 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6411 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6412 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6413 pic_offset_table_rtx, tmp_reg));
6414 }
6415 else
6416 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6417 }
6418 else
6419 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6420 }
6421
6422 /* Prevent function calls from being scheduled before the call to mcount.
6423 In the pic_reg_used case, make sure that the got load isn't deleted. */
6424 if (current_function_profile)
6425 {
6426 if (pic_reg_used)
6427 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6428 emit_insn (gen_blockage ());
6429 }
6430 }
6431
6432 /* Emit code to restore saved registers using MOV insns. First register
6433 is restored from POINTER + OFFSET. */
6434 static void
6435 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6436 int maybe_eh_return)
6437 {
6438 int regno;
6439 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6440
6441 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6442 if (ix86_save_reg (regno, maybe_eh_return))
6443 {
6444 /* Ensure that adjust_address won't be forced to produce pointer
6445 out of range allowed by x86-64 instruction set. */
6446 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6447 {
6448 rtx r11;
6449
6450 r11 = gen_rtx_REG (DImode, R11_REG);
6451 emit_move_insn (r11, GEN_INT (offset));
6452 emit_insn (gen_adddi3 (r11, r11, pointer));
6453 base_address = gen_rtx_MEM (Pmode, r11);
6454 offset = 0;
6455 }
6456 emit_move_insn (gen_rtx_REG (Pmode, regno),
6457 adjust_address (base_address, Pmode, offset));
6458 offset += UNITS_PER_WORD;
6459 }
6460 }
6461
6462 /* Restore function stack, frame, and registers. */
6463
6464 void
6465 ix86_expand_epilogue (int style)
6466 {
6467 int regno;
6468 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6469 struct ix86_frame frame;
6470 HOST_WIDE_INT offset;
6471
6472 ix86_compute_frame_layout (&frame);
6473
6474 /* Calculate start of saved registers relative to ebp. Special care
6475 must be taken for the normal return case of a function using
6476 eh_return: the eax and edx registers are marked as saved, but not
6477 restored along this path. */
6478 offset = frame.nregs;
6479 if (current_function_calls_eh_return && style != 2)
6480 offset -= 2;
6481 offset *= -UNITS_PER_WORD;
6482
6483 /* If we're only restoring one register and sp is not valid then
6484 using a move instruction to restore the register since it's
6485 less work than reloading sp and popping the register.
6486
6487 The default code result in stack adjustment using add/lea instruction,
6488 while this code results in LEAVE instruction (or discrete equivalent),
6489 so it is profitable in some other cases as well. Especially when there
6490 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6491 and there is exactly one register to pop. This heuristic may need some
6492 tuning in future. */
6493 if ((!sp_valid && frame.nregs <= 1)
6494 || (TARGET_EPILOGUE_USING_MOVE
6495 && cfun->machine->use_fast_prologue_epilogue
6496 && (frame.nregs > 1 || frame.to_allocate))
6497 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6498 || (frame_pointer_needed && TARGET_USE_LEAVE
6499 && cfun->machine->use_fast_prologue_epilogue
6500 && frame.nregs == 1)
6501 || current_function_calls_eh_return)
6502 {
6503 /* Restore registers. We can use ebp or esp to address the memory
6504 locations. If both are available, default to ebp, since offsets
6505 are known to be small. Only exception is esp pointing directly to the
6506 end of block of saved registers, where we may simplify addressing
6507 mode. */
6508
6509 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6510 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6511 frame.to_allocate, style == 2);
6512 else
6513 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6514 offset, style == 2);
6515
6516 /* eh_return epilogues need %ecx added to the stack pointer. */
6517 if (style == 2)
6518 {
6519 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6520
6521 if (frame_pointer_needed)
6522 {
6523 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6524 tmp = plus_constant (tmp, UNITS_PER_WORD);
6525 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6526
6527 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6528 emit_move_insn (hard_frame_pointer_rtx, tmp);
6529
6530 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6531 const0_rtx, style);
6532 }
6533 else
6534 {
6535 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6536 tmp = plus_constant (tmp, (frame.to_allocate
6537 + frame.nregs * UNITS_PER_WORD));
6538 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6539 }
6540 }
6541 else if (!frame_pointer_needed)
6542 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6543 GEN_INT (frame.to_allocate
6544 + frame.nregs * UNITS_PER_WORD),
6545 style);
6546 /* If not an i386, mov & pop is faster than "leave". */
6547 else if (TARGET_USE_LEAVE || optimize_size
6548 || !cfun->machine->use_fast_prologue_epilogue)
6549 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6550 else
6551 {
6552 pro_epilogue_adjust_stack (stack_pointer_rtx,
6553 hard_frame_pointer_rtx,
6554 const0_rtx, style);
6555 if (TARGET_64BIT)
6556 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6557 else
6558 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6559 }
6560 }
6561 else
6562 {
6563 /* First step is to deallocate the stack frame so that we can
6564 pop the registers. */
6565 if (!sp_valid)
6566 {
6567 gcc_assert (frame_pointer_needed);
6568 pro_epilogue_adjust_stack (stack_pointer_rtx,
6569 hard_frame_pointer_rtx,
6570 GEN_INT (offset), style);
6571 }
6572 else if (frame.to_allocate)
6573 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6574 GEN_INT (frame.to_allocate), style);
6575
6576 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6577 if (ix86_save_reg (regno, false))
6578 {
6579 if (TARGET_64BIT)
6580 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6581 else
6582 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6583 }
6584 if (frame_pointer_needed)
6585 {
6586 /* Leave results in shorter dependency chains on CPUs that are
6587 able to grok it fast. */
6588 if (TARGET_USE_LEAVE)
6589 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6590 else if (TARGET_64BIT)
6591 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6592 else
6593 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6594 }
6595 }
6596
6597 if (cfun->machine->force_align_arg_pointer)
6598 {
6599 emit_insn (gen_addsi3 (stack_pointer_rtx,
6600 cfun->machine->force_align_arg_pointer,
6601 GEN_INT (-4)));
6602 }
6603
6604 /* Sibcall epilogues don't want a return instruction. */
6605 if (style == 0)
6606 return;
6607
6608 if (current_function_pops_args && current_function_args_size)
6609 {
6610 rtx popc = GEN_INT (current_function_pops_args);
6611
6612 /* i386 can only pop 64K bytes. If asked to pop more, pop
6613 return address, do explicit add, and jump indirectly to the
6614 caller. */
6615
6616 if (current_function_pops_args >= 65536)
6617 {
6618 rtx ecx = gen_rtx_REG (SImode, CX_REG);
6619
6620 /* There is no "pascal" calling convention in any 64bit ABI. */
6621 gcc_assert (!TARGET_64BIT);
6622
6623 emit_insn (gen_popsi1 (ecx));
6624 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6625 emit_jump_insn (gen_return_indirect_internal (ecx));
6626 }
6627 else
6628 emit_jump_insn (gen_return_pop_internal (popc));
6629 }
6630 else
6631 emit_jump_insn (gen_return_internal ());
6632 }
6633
6634 /* Reset from the function's potential modifications. */
6635
6636 static void
6637 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6638 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6639 {
6640 if (pic_offset_table_rtx)
6641 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6642 #if TARGET_MACHO
6643 /* Mach-O doesn't support labels at the end of objects, so if
6644 it looks like we might want one, insert a NOP. */
6645 {
6646 rtx insn = get_last_insn ();
6647 while (insn
6648 && NOTE_P (insn)
6649 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6650 insn = PREV_INSN (insn);
6651 if (insn
6652 && (LABEL_P (insn)
6653 || (NOTE_P (insn)
6654 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6655 fputs ("\tnop\n", file);
6656 }
6657 #endif
6658
6659 }
6660 \f
6661 /* Extract the parts of an RTL expression that is a valid memory address
6662 for an instruction. Return 0 if the structure of the address is
6663 grossly off. Return -1 if the address contains ASHIFT, so it is not
6664 strictly valid, but still used for computing length of lea instruction. */
6665
6666 int
6667 ix86_decompose_address (rtx addr, struct ix86_address *out)
6668 {
6669 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6670 rtx base_reg, index_reg;
6671 HOST_WIDE_INT scale = 1;
6672 rtx scale_rtx = NULL_RTX;
6673 int retval = 1;
6674 enum ix86_address_seg seg = SEG_DEFAULT;
6675
6676 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6677 base = addr;
6678 else if (GET_CODE (addr) == PLUS)
6679 {
6680 rtx addends[4], op;
6681 int n = 0, i;
6682
6683 op = addr;
6684 do
6685 {
6686 if (n >= 4)
6687 return 0;
6688 addends[n++] = XEXP (op, 1);
6689 op = XEXP (op, 0);
6690 }
6691 while (GET_CODE (op) == PLUS);
6692 if (n >= 4)
6693 return 0;
6694 addends[n] = op;
6695
6696 for (i = n; i >= 0; --i)
6697 {
6698 op = addends[i];
6699 switch (GET_CODE (op))
6700 {
6701 case MULT:
6702 if (index)
6703 return 0;
6704 index = XEXP (op, 0);
6705 scale_rtx = XEXP (op, 1);
6706 break;
6707
6708 case UNSPEC:
6709 if (XINT (op, 1) == UNSPEC_TP
6710 && TARGET_TLS_DIRECT_SEG_REFS
6711 && seg == SEG_DEFAULT)
6712 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6713 else
6714 return 0;
6715 break;
6716
6717 case REG:
6718 case SUBREG:
6719 if (!base)
6720 base = op;
6721 else if (!index)
6722 index = op;
6723 else
6724 return 0;
6725 break;
6726
6727 case CONST:
6728 case CONST_INT:
6729 case SYMBOL_REF:
6730 case LABEL_REF:
6731 if (disp)
6732 return 0;
6733 disp = op;
6734 break;
6735
6736 default:
6737 return 0;
6738 }
6739 }
6740 }
6741 else if (GET_CODE (addr) == MULT)
6742 {
6743 index = XEXP (addr, 0); /* index*scale */
6744 scale_rtx = XEXP (addr, 1);
6745 }
6746 else if (GET_CODE (addr) == ASHIFT)
6747 {
6748 rtx tmp;
6749
6750 /* We're called for lea too, which implements ashift on occasion. */
6751 index = XEXP (addr, 0);
6752 tmp = XEXP (addr, 1);
6753 if (!CONST_INT_P (tmp))
6754 return 0;
6755 scale = INTVAL (tmp);
6756 if ((unsigned HOST_WIDE_INT) scale > 3)
6757 return 0;
6758 scale = 1 << scale;
6759 retval = -1;
6760 }
6761 else
6762 disp = addr; /* displacement */
6763
6764 /* Extract the integral value of scale. */
6765 if (scale_rtx)
6766 {
6767 if (!CONST_INT_P (scale_rtx))
6768 return 0;
6769 scale = INTVAL (scale_rtx);
6770 }
6771
6772 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6773 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6774
6775 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6776 if (base_reg && index_reg && scale == 1
6777 && (index_reg == arg_pointer_rtx
6778 || index_reg == frame_pointer_rtx
6779 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6780 {
6781 rtx tmp;
6782 tmp = base, base = index, index = tmp;
6783 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6784 }
6785
6786 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6787 if ((base_reg == hard_frame_pointer_rtx
6788 || base_reg == frame_pointer_rtx
6789 || base_reg == arg_pointer_rtx) && !disp)
6790 disp = const0_rtx;
6791
6792 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6793 Avoid this by transforming to [%esi+0]. */
6794 if (TARGET_K6 && !optimize_size
6795 && base_reg && !index_reg && !disp
6796 && REG_P (base_reg)
6797 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6798 disp = const0_rtx;
6799
6800 /* Special case: encode reg+reg instead of reg*2. */
6801 if (!base && index && scale && scale == 2)
6802 base = index, base_reg = index_reg, scale = 1;
6803
6804 /* Special case: scaling cannot be encoded without base or displacement. */
6805 if (!base && !disp && index && scale != 1)
6806 disp = const0_rtx;
6807
6808 out->base = base;
6809 out->index = index;
6810 out->disp = disp;
6811 out->scale = scale;
6812 out->seg = seg;
6813
6814 return retval;
6815 }
6816 \f
6817 /* Return cost of the memory address x.
6818 For i386, it is better to use a complex address than let gcc copy
6819 the address into a reg and make a new pseudo. But not if the address
6820 requires to two regs - that would mean more pseudos with longer
6821 lifetimes. */
6822 static int
6823 ix86_address_cost (rtx x)
6824 {
6825 struct ix86_address parts;
6826 int cost = 1;
6827 int ok = ix86_decompose_address (x, &parts);
6828
6829 gcc_assert (ok);
6830
6831 if (parts.base && GET_CODE (parts.base) == SUBREG)
6832 parts.base = SUBREG_REG (parts.base);
6833 if (parts.index && GET_CODE (parts.index) == SUBREG)
6834 parts.index = SUBREG_REG (parts.index);
6835
6836 /* Attempt to minimize number of registers in the address. */
6837 if ((parts.base
6838 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6839 || (parts.index
6840 && (!REG_P (parts.index)
6841 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6842 cost++;
6843
6844 if (parts.base
6845 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6846 && parts.index
6847 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6848 && parts.base != parts.index)
6849 cost++;
6850
6851 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6852 since it's predecode logic can't detect the length of instructions
6853 and it degenerates to vector decoded. Increase cost of such
6854 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6855 to split such addresses or even refuse such addresses at all.
6856
6857 Following addressing modes are affected:
6858 [base+scale*index]
6859 [scale*index+disp]
6860 [base+index]
6861
6862 The first and last case may be avoidable by explicitly coding the zero in
6863 memory address, but I don't have AMD-K6 machine handy to check this
6864 theory. */
6865
6866 if (TARGET_K6
6867 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6868 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6869 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6870 cost += 10;
6871
6872 return cost;
6873 }
6874 \f
6875 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6876 this is used for to form addresses to local data when -fPIC is in
6877 use. */
6878
6879 static bool
6880 darwin_local_data_pic (rtx disp)
6881 {
6882 if (GET_CODE (disp) == MINUS)
6883 {
6884 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6885 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6886 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6887 {
6888 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6889 if (! strcmp (sym_name, "<pic base>"))
6890 return true;
6891 }
6892 }
6893
6894 return false;
6895 }
6896
6897 /* Determine if a given RTX is a valid constant. We already know this
6898 satisfies CONSTANT_P. */
6899
6900 bool
6901 legitimate_constant_p (rtx x)
6902 {
6903 switch (GET_CODE (x))
6904 {
6905 case CONST:
6906 x = XEXP (x, 0);
6907
6908 if (GET_CODE (x) == PLUS)
6909 {
6910 if (!CONST_INT_P (XEXP (x, 1)))
6911 return false;
6912 x = XEXP (x, 0);
6913 }
6914
6915 if (TARGET_MACHO && darwin_local_data_pic (x))
6916 return true;
6917
6918 /* Only some unspecs are valid as "constants". */
6919 if (GET_CODE (x) == UNSPEC)
6920 switch (XINT (x, 1))
6921 {
6922 case UNSPEC_GOT:
6923 case UNSPEC_GOTOFF:
6924 case UNSPEC_PLTOFF:
6925 return TARGET_64BIT;
6926 case UNSPEC_TPOFF:
6927 case UNSPEC_NTPOFF:
6928 x = XVECEXP (x, 0, 0);
6929 return (GET_CODE (x) == SYMBOL_REF
6930 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6931 case UNSPEC_DTPOFF:
6932 x = XVECEXP (x, 0, 0);
6933 return (GET_CODE (x) == SYMBOL_REF
6934 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6935 default:
6936 return false;
6937 }
6938
6939 /* We must have drilled down to a symbol. */
6940 if (GET_CODE (x) == LABEL_REF)
6941 return true;
6942 if (GET_CODE (x) != SYMBOL_REF)
6943 return false;
6944 /* FALLTHRU */
6945
6946 case SYMBOL_REF:
6947 /* TLS symbols are never valid. */
6948 if (SYMBOL_REF_TLS_MODEL (x))
6949 return false;
6950
6951 /* DLLIMPORT symbols are never valid. */
6952 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6953 && SYMBOL_REF_DLLIMPORT_P (x))
6954 return false;
6955 break;
6956
6957 case CONST_DOUBLE:
6958 if (GET_MODE (x) == TImode
6959 && x != CONST0_RTX (TImode)
6960 && !TARGET_64BIT)
6961 return false;
6962 break;
6963
6964 case CONST_VECTOR:
6965 if (x == CONST0_RTX (GET_MODE (x)))
6966 return true;
6967 return false;
6968
6969 default:
6970 break;
6971 }
6972
6973 /* Otherwise we handle everything else in the move patterns. */
6974 return true;
6975 }
6976
6977 /* Determine if it's legal to put X into the constant pool. This
6978 is not possible for the address of thread-local symbols, which
6979 is checked above. */
6980
6981 static bool
6982 ix86_cannot_force_const_mem (rtx x)
6983 {
6984 /* We can always put integral constants and vectors in memory. */
6985 switch (GET_CODE (x))
6986 {
6987 case CONST_INT:
6988 case CONST_DOUBLE:
6989 case CONST_VECTOR:
6990 return false;
6991
6992 default:
6993 break;
6994 }
6995 return !legitimate_constant_p (x);
6996 }
6997
6998 /* Determine if a given RTX is a valid constant address. */
6999
7000 bool
7001 constant_address_p (rtx x)
7002 {
7003 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
7004 }
7005
7006 /* Nonzero if the constant value X is a legitimate general operand
7007 when generating PIC code. It is given that flag_pic is on and
7008 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
7009
7010 bool
7011 legitimate_pic_operand_p (rtx x)
7012 {
7013 rtx inner;
7014
7015 switch (GET_CODE (x))
7016 {
7017 case CONST:
7018 inner = XEXP (x, 0);
7019 if (GET_CODE (inner) == PLUS
7020 && CONST_INT_P (XEXP (inner, 1)))
7021 inner = XEXP (inner, 0);
7022
7023 /* Only some unspecs are valid as "constants". */
7024 if (GET_CODE (inner) == UNSPEC)
7025 switch (XINT (inner, 1))
7026 {
7027 case UNSPEC_GOT:
7028 case UNSPEC_GOTOFF:
7029 case UNSPEC_PLTOFF:
7030 return TARGET_64BIT;
7031 case UNSPEC_TPOFF:
7032 x = XVECEXP (inner, 0, 0);
7033 return (GET_CODE (x) == SYMBOL_REF
7034 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7035 default:
7036 return false;
7037 }
7038 /* FALLTHRU */
7039
7040 case SYMBOL_REF:
7041 case LABEL_REF:
7042 return legitimate_pic_address_disp_p (x);
7043
7044 default:
7045 return true;
7046 }
7047 }
7048
7049 /* Determine if a given CONST RTX is a valid memory displacement
7050 in PIC mode. */
7051
7052 int
7053 legitimate_pic_address_disp_p (rtx disp)
7054 {
7055 bool saw_plus;
7056
7057 /* In 64bit mode we can allow direct addresses of symbols and labels
7058 when they are not dynamic symbols. */
7059 if (TARGET_64BIT)
7060 {
7061 rtx op0 = disp, op1;
7062
7063 switch (GET_CODE (disp))
7064 {
7065 case LABEL_REF:
7066 return true;
7067
7068 case CONST:
7069 if (GET_CODE (XEXP (disp, 0)) != PLUS)
7070 break;
7071 op0 = XEXP (XEXP (disp, 0), 0);
7072 op1 = XEXP (XEXP (disp, 0), 1);
7073 if (!CONST_INT_P (op1)
7074 || INTVAL (op1) >= 16*1024*1024
7075 || INTVAL (op1) < -16*1024*1024)
7076 break;
7077 if (GET_CODE (op0) == LABEL_REF)
7078 return true;
7079 if (GET_CODE (op0) != SYMBOL_REF)
7080 break;
7081 /* FALLTHRU */
7082
7083 case SYMBOL_REF:
7084 /* TLS references should always be enclosed in UNSPEC. */
7085 if (SYMBOL_REF_TLS_MODEL (op0))
7086 return false;
7087 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
7088 && ix86_cmodel != CM_LARGE_PIC)
7089 return true;
7090 break;
7091
7092 default:
7093 break;
7094 }
7095 }
7096 if (GET_CODE (disp) != CONST)
7097 return 0;
7098 disp = XEXP (disp, 0);
7099
7100 if (TARGET_64BIT)
7101 {
7102 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7103 of GOT tables. We should not need these anyway. */
7104 if (GET_CODE (disp) != UNSPEC
7105 || (XINT (disp, 1) != UNSPEC_GOTPCREL
7106 && XINT (disp, 1) != UNSPEC_GOTOFF
7107 && XINT (disp, 1) != UNSPEC_PLTOFF))
7108 return 0;
7109
7110 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
7111 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
7112 return 0;
7113 return 1;
7114 }
7115
7116 saw_plus = false;
7117 if (GET_CODE (disp) == PLUS)
7118 {
7119 if (!CONST_INT_P (XEXP (disp, 1)))
7120 return 0;
7121 disp = XEXP (disp, 0);
7122 saw_plus = true;
7123 }
7124
7125 if (TARGET_MACHO && darwin_local_data_pic (disp))
7126 return 1;
7127
7128 if (GET_CODE (disp) != UNSPEC)
7129 return 0;
7130
7131 switch (XINT (disp, 1))
7132 {
7133 case UNSPEC_GOT:
7134 if (saw_plus)
7135 return false;
7136 /* We need to check for both symbols and labels because VxWorks loads
7137 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7138 details. */
7139 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7140 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
7141 case UNSPEC_GOTOFF:
7142 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7143 While ABI specify also 32bit relocation but we don't produce it in
7144 small PIC model at all. */
7145 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7146 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
7147 && !TARGET_64BIT)
7148 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
7149 return false;
7150 case UNSPEC_GOTTPOFF:
7151 case UNSPEC_GOTNTPOFF:
7152 case UNSPEC_INDNTPOFF:
7153 if (saw_plus)
7154 return false;
7155 disp = XVECEXP (disp, 0, 0);
7156 return (GET_CODE (disp) == SYMBOL_REF
7157 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
7158 case UNSPEC_NTPOFF:
7159 disp = XVECEXP (disp, 0, 0);
7160 return (GET_CODE (disp) == SYMBOL_REF
7161 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
7162 case UNSPEC_DTPOFF:
7163 disp = XVECEXP (disp, 0, 0);
7164 return (GET_CODE (disp) == SYMBOL_REF
7165 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
7166 }
7167
7168 return 0;
7169 }
7170
7171 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7172 memory address for an instruction. The MODE argument is the machine mode
7173 for the MEM expression that wants to use this address.
7174
7175 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7176 convert common non-canonical forms to canonical form so that they will
7177 be recognized. */
7178
7179 int
7180 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
7181 rtx addr, int strict)
7182 {
7183 struct ix86_address parts;
7184 rtx base, index, disp;
7185 HOST_WIDE_INT scale;
7186 const char *reason = NULL;
7187 rtx reason_rtx = NULL_RTX;
7188
7189 if (ix86_decompose_address (addr, &parts) <= 0)
7190 {
7191 reason = "decomposition failed";
7192 goto report_error;
7193 }
7194
7195 base = parts.base;
7196 index = parts.index;
7197 disp = parts.disp;
7198 scale = parts.scale;
7199
7200 /* Validate base register.
7201
7202 Don't allow SUBREG's that span more than a word here. It can lead to spill
7203 failures when the base is one word out of a two word structure, which is
7204 represented internally as a DImode int. */
7205
7206 if (base)
7207 {
7208 rtx reg;
7209 reason_rtx = base;
7210
7211 if (REG_P (base))
7212 reg = base;
7213 else if (GET_CODE (base) == SUBREG
7214 && REG_P (SUBREG_REG (base))
7215 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7216 <= UNITS_PER_WORD)
7217 reg = SUBREG_REG (base);
7218 else
7219 {
7220 reason = "base is not a register";
7221 goto report_error;
7222 }
7223
7224 if (GET_MODE (base) != Pmode)
7225 {
7226 reason = "base is not in Pmode";
7227 goto report_error;
7228 }
7229
7230 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7231 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7232 {
7233 reason = "base is not valid";
7234 goto report_error;
7235 }
7236 }
7237
7238 /* Validate index register.
7239
7240 Don't allow SUBREG's that span more than a word here -- same as above. */
7241
7242 if (index)
7243 {
7244 rtx reg;
7245 reason_rtx = index;
7246
7247 if (REG_P (index))
7248 reg = index;
7249 else if (GET_CODE (index) == SUBREG
7250 && REG_P (SUBREG_REG (index))
7251 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7252 <= UNITS_PER_WORD)
7253 reg = SUBREG_REG (index);
7254 else
7255 {
7256 reason = "index is not a register";
7257 goto report_error;
7258 }
7259
7260 if (GET_MODE (index) != Pmode)
7261 {
7262 reason = "index is not in Pmode";
7263 goto report_error;
7264 }
7265
7266 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7267 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7268 {
7269 reason = "index is not valid";
7270 goto report_error;
7271 }
7272 }
7273
7274 /* Validate scale factor. */
7275 if (scale != 1)
7276 {
7277 reason_rtx = GEN_INT (scale);
7278 if (!index)
7279 {
7280 reason = "scale without index";
7281 goto report_error;
7282 }
7283
7284 if (scale != 2 && scale != 4 && scale != 8)
7285 {
7286 reason = "scale is not a valid multiplier";
7287 goto report_error;
7288 }
7289 }
7290
7291 /* Validate displacement. */
7292 if (disp)
7293 {
7294 reason_rtx = disp;
7295
7296 if (GET_CODE (disp) == CONST
7297 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7298 switch (XINT (XEXP (disp, 0), 1))
7299 {
7300 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7301 used. While ABI specify also 32bit relocations, we don't produce
7302 them at all and use IP relative instead. */
7303 case UNSPEC_GOT:
7304 case UNSPEC_GOTOFF:
7305 gcc_assert (flag_pic);
7306 if (!TARGET_64BIT)
7307 goto is_legitimate_pic;
7308 reason = "64bit address unspec";
7309 goto report_error;
7310
7311 case UNSPEC_GOTPCREL:
7312 gcc_assert (flag_pic);
7313 goto is_legitimate_pic;
7314
7315 case UNSPEC_GOTTPOFF:
7316 case UNSPEC_GOTNTPOFF:
7317 case UNSPEC_INDNTPOFF:
7318 case UNSPEC_NTPOFF:
7319 case UNSPEC_DTPOFF:
7320 break;
7321
7322 default:
7323 reason = "invalid address unspec";
7324 goto report_error;
7325 }
7326
7327 else if (SYMBOLIC_CONST (disp)
7328 && (flag_pic
7329 || (TARGET_MACHO
7330 #if TARGET_MACHO
7331 && MACHOPIC_INDIRECT
7332 && !machopic_operand_p (disp)
7333 #endif
7334 )))
7335 {
7336
7337 is_legitimate_pic:
7338 if (TARGET_64BIT && (index || base))
7339 {
7340 /* foo@dtpoff(%rX) is ok. */
7341 if (GET_CODE (disp) != CONST
7342 || GET_CODE (XEXP (disp, 0)) != PLUS
7343 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7344 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7345 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7346 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7347 {
7348 reason = "non-constant pic memory reference";
7349 goto report_error;
7350 }
7351 }
7352 else if (! legitimate_pic_address_disp_p (disp))
7353 {
7354 reason = "displacement is an invalid pic construct";
7355 goto report_error;
7356 }
7357
7358 /* This code used to verify that a symbolic pic displacement
7359 includes the pic_offset_table_rtx register.
7360
7361 While this is good idea, unfortunately these constructs may
7362 be created by "adds using lea" optimization for incorrect
7363 code like:
7364
7365 int a;
7366 int foo(int i)
7367 {
7368 return *(&a+i);
7369 }
7370
7371 This code is nonsensical, but results in addressing
7372 GOT table with pic_offset_table_rtx base. We can't
7373 just refuse it easily, since it gets matched by
7374 "addsi3" pattern, that later gets split to lea in the
7375 case output register differs from input. While this
7376 can be handled by separate addsi pattern for this case
7377 that never results in lea, this seems to be easier and
7378 correct fix for crash to disable this test. */
7379 }
7380 else if (GET_CODE (disp) != LABEL_REF
7381 && !CONST_INT_P (disp)
7382 && (GET_CODE (disp) != CONST
7383 || !legitimate_constant_p (disp))
7384 && (GET_CODE (disp) != SYMBOL_REF
7385 || !legitimate_constant_p (disp)))
7386 {
7387 reason = "displacement is not constant";
7388 goto report_error;
7389 }
7390 else if (TARGET_64BIT
7391 && !x86_64_immediate_operand (disp, VOIDmode))
7392 {
7393 reason = "displacement is out of range";
7394 goto report_error;
7395 }
7396 }
7397
7398 /* Everything looks valid. */
7399 return TRUE;
7400
7401 report_error:
7402 return FALSE;
7403 }
7404 \f
7405 /* Return a unique alias set for the GOT. */
7406
7407 static alias_set_type
7408 ix86_GOT_alias_set (void)
7409 {
7410 static alias_set_type set = -1;
7411 if (set == -1)
7412 set = new_alias_set ();
7413 return set;
7414 }
7415
7416 /* Return a legitimate reference for ORIG (an address) using the
7417 register REG. If REG is 0, a new pseudo is generated.
7418
7419 There are two types of references that must be handled:
7420
7421 1. Global data references must load the address from the GOT, via
7422 the PIC reg. An insn is emitted to do this load, and the reg is
7423 returned.
7424
7425 2. Static data references, constant pool addresses, and code labels
7426 compute the address as an offset from the GOT, whose base is in
7427 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7428 differentiate them from global data objects. The returned
7429 address is the PIC reg + an unspec constant.
7430
7431 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7432 reg also appears in the address. */
7433
7434 static rtx
7435 legitimize_pic_address (rtx orig, rtx reg)
7436 {
7437 rtx addr = orig;
7438 rtx new_rtx = orig;
7439 rtx base;
7440
7441 #if TARGET_MACHO
7442 if (TARGET_MACHO && !TARGET_64BIT)
7443 {
7444 if (reg == 0)
7445 reg = gen_reg_rtx (Pmode);
7446 /* Use the generic Mach-O PIC machinery. */
7447 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7448 }
7449 #endif
7450
7451 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7452 new_rtx = addr;
7453 else if (TARGET_64BIT
7454 && ix86_cmodel != CM_SMALL_PIC
7455 && gotoff_operand (addr, Pmode))
7456 {
7457 rtx tmpreg;
7458 /* This symbol may be referenced via a displacement from the PIC
7459 base address (@GOTOFF). */
7460
7461 if (reload_in_progress)
7462 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7463 if (GET_CODE (addr) == CONST)
7464 addr = XEXP (addr, 0);
7465 if (GET_CODE (addr) == PLUS)
7466 {
7467 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7468 UNSPEC_GOTOFF);
7469 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7470 }
7471 else
7472 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7473 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7474 if (!reg)
7475 tmpreg = gen_reg_rtx (Pmode);
7476 else
7477 tmpreg = reg;
7478 emit_move_insn (tmpreg, new_rtx);
7479
7480 if (reg != 0)
7481 {
7482 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7483 tmpreg, 1, OPTAB_DIRECT);
7484 new_rtx = reg;
7485 }
7486 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7487 }
7488 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7489 {
7490 /* This symbol may be referenced via a displacement from the PIC
7491 base address (@GOTOFF). */
7492
7493 if (reload_in_progress)
7494 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7495 if (GET_CODE (addr) == CONST)
7496 addr = XEXP (addr, 0);
7497 if (GET_CODE (addr) == PLUS)
7498 {
7499 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7500 UNSPEC_GOTOFF);
7501 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7502 }
7503 else
7504 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7505 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7506 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7507
7508 if (reg != 0)
7509 {
7510 emit_move_insn (reg, new_rtx);
7511 new_rtx = reg;
7512 }
7513 }
7514 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7515 /* We can't use @GOTOFF for text labels on VxWorks;
7516 see gotoff_operand. */
7517 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7518 {
7519 /* Given that we've already handled dllimport variables separately
7520 in legitimize_address, and all other variables should satisfy
7521 legitimate_pic_address_disp_p, we should never arrive here. */
7522 gcc_assert (!TARGET_64BIT_MS_ABI);
7523
7524 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7525 {
7526 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7527 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7528 new_rtx = gen_const_mem (Pmode, new_rtx);
7529 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7530
7531 if (reg == 0)
7532 reg = gen_reg_rtx (Pmode);
7533 /* Use directly gen_movsi, otherwise the address is loaded
7534 into register for CSE. We don't want to CSE this addresses,
7535 instead we CSE addresses from the GOT table, so skip this. */
7536 emit_insn (gen_movsi (reg, new_rtx));
7537 new_rtx = reg;
7538 }
7539 else
7540 {
7541 /* This symbol must be referenced via a load from the
7542 Global Offset Table (@GOT). */
7543
7544 if (reload_in_progress)
7545 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7546 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7547 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7548 if (TARGET_64BIT)
7549 new_rtx = force_reg (Pmode, new_rtx);
7550 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7551 new_rtx = gen_const_mem (Pmode, new_rtx);
7552 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7553
7554 if (reg == 0)
7555 reg = gen_reg_rtx (Pmode);
7556 emit_move_insn (reg, new_rtx);
7557 new_rtx = reg;
7558 }
7559 }
7560 else
7561 {
7562 if (CONST_INT_P (addr)
7563 && !x86_64_immediate_operand (addr, VOIDmode))
7564 {
7565 if (reg)
7566 {
7567 emit_move_insn (reg, addr);
7568 new_rtx = reg;
7569 }
7570 else
7571 new_rtx = force_reg (Pmode, addr);
7572 }
7573 else if (GET_CODE (addr) == CONST)
7574 {
7575 addr = XEXP (addr, 0);
7576
7577 /* We must match stuff we generate before. Assume the only
7578 unspecs that can get here are ours. Not that we could do
7579 anything with them anyway.... */
7580 if (GET_CODE (addr) == UNSPEC
7581 || (GET_CODE (addr) == PLUS
7582 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7583 return orig;
7584 gcc_assert (GET_CODE (addr) == PLUS);
7585 }
7586 if (GET_CODE (addr) == PLUS)
7587 {
7588 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7589
7590 /* Check first to see if this is a constant offset from a @GOTOFF
7591 symbol reference. */
7592 if (gotoff_operand (op0, Pmode)
7593 && CONST_INT_P (op1))
7594 {
7595 if (!TARGET_64BIT)
7596 {
7597 if (reload_in_progress)
7598 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7599 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7600 UNSPEC_GOTOFF);
7601 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7602 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7603 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7604
7605 if (reg != 0)
7606 {
7607 emit_move_insn (reg, new_rtx);
7608 new_rtx = reg;
7609 }
7610 }
7611 else
7612 {
7613 if (INTVAL (op1) < -16*1024*1024
7614 || INTVAL (op1) >= 16*1024*1024)
7615 {
7616 if (!x86_64_immediate_operand (op1, Pmode))
7617 op1 = force_reg (Pmode, op1);
7618 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7619 }
7620 }
7621 }
7622 else
7623 {
7624 base = legitimize_pic_address (XEXP (addr, 0), reg);
7625 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7626 base == reg ? NULL_RTX : reg);
7627
7628 if (CONST_INT_P (new_rtx))
7629 new_rtx = plus_constant (base, INTVAL (new_rtx));
7630 else
7631 {
7632 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7633 {
7634 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7635 new_rtx = XEXP (new_rtx, 1);
7636 }
7637 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7638 }
7639 }
7640 }
7641 }
7642 return new_rtx;
7643 }
7644 \f
7645 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7646
7647 static rtx
7648 get_thread_pointer (int to_reg)
7649 {
7650 rtx tp, reg, insn;
7651
7652 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7653 if (!to_reg)
7654 return tp;
7655
7656 reg = gen_reg_rtx (Pmode);
7657 insn = gen_rtx_SET (VOIDmode, reg, tp);
7658 insn = emit_insn (insn);
7659
7660 return reg;
7661 }
7662
7663 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7664 false if we expect this to be used for a memory address and true if
7665 we expect to load the address into a register. */
7666
7667 static rtx
7668 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7669 {
7670 rtx dest, base, off, pic, tp;
7671 int type;
7672
7673 switch (model)
7674 {
7675 case TLS_MODEL_GLOBAL_DYNAMIC:
7676 dest = gen_reg_rtx (Pmode);
7677 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7678
7679 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7680 {
7681 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
7682
7683 start_sequence ();
7684 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7685 insns = get_insns ();
7686 end_sequence ();
7687
7688 CONST_OR_PURE_CALL_P (insns) = 1;
7689 emit_libcall_block (insns, dest, rax, x);
7690 }
7691 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7692 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7693 else
7694 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7695
7696 if (TARGET_GNU2_TLS)
7697 {
7698 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7699
7700 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7701 }
7702 break;
7703
7704 case TLS_MODEL_LOCAL_DYNAMIC:
7705 base = gen_reg_rtx (Pmode);
7706 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7707
7708 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7709 {
7710 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
7711
7712 start_sequence ();
7713 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7714 insns = get_insns ();
7715 end_sequence ();
7716
7717 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7718 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7719 CONST_OR_PURE_CALL_P (insns) = 1;
7720 emit_libcall_block (insns, base, rax, note);
7721 }
7722 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7723 emit_insn (gen_tls_local_dynamic_base_64 (base));
7724 else
7725 emit_insn (gen_tls_local_dynamic_base_32 (base));
7726
7727 if (TARGET_GNU2_TLS)
7728 {
7729 rtx x = ix86_tls_module_base ();
7730
7731 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7732 gen_rtx_MINUS (Pmode, x, tp));
7733 }
7734
7735 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7736 off = gen_rtx_CONST (Pmode, off);
7737
7738 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7739
7740 if (TARGET_GNU2_TLS)
7741 {
7742 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7743
7744 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7745 }
7746
7747 break;
7748
7749 case TLS_MODEL_INITIAL_EXEC:
7750 if (TARGET_64BIT)
7751 {
7752 pic = NULL;
7753 type = UNSPEC_GOTNTPOFF;
7754 }
7755 else if (flag_pic)
7756 {
7757 if (reload_in_progress)
7758 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7759 pic = pic_offset_table_rtx;
7760 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7761 }
7762 else if (!TARGET_ANY_GNU_TLS)
7763 {
7764 pic = gen_reg_rtx (Pmode);
7765 emit_insn (gen_set_got (pic));
7766 type = UNSPEC_GOTTPOFF;
7767 }
7768 else
7769 {
7770 pic = NULL;
7771 type = UNSPEC_INDNTPOFF;
7772 }
7773
7774 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7775 off = gen_rtx_CONST (Pmode, off);
7776 if (pic)
7777 off = gen_rtx_PLUS (Pmode, pic, off);
7778 off = gen_const_mem (Pmode, off);
7779 set_mem_alias_set (off, ix86_GOT_alias_set ());
7780
7781 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7782 {
7783 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7784 off = force_reg (Pmode, off);
7785 return gen_rtx_PLUS (Pmode, base, off);
7786 }
7787 else
7788 {
7789 base = get_thread_pointer (true);
7790 dest = gen_reg_rtx (Pmode);
7791 emit_insn (gen_subsi3 (dest, base, off));
7792 }
7793 break;
7794
7795 case TLS_MODEL_LOCAL_EXEC:
7796 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7797 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7798 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7799 off = gen_rtx_CONST (Pmode, off);
7800
7801 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7802 {
7803 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7804 return gen_rtx_PLUS (Pmode, base, off);
7805 }
7806 else
7807 {
7808 base = get_thread_pointer (true);
7809 dest = gen_reg_rtx (Pmode);
7810 emit_insn (gen_subsi3 (dest, base, off));
7811 }
7812 break;
7813
7814 default:
7815 gcc_unreachable ();
7816 }
7817
7818 return dest;
7819 }
7820
7821 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7822 to symbol DECL. */
7823
7824 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7825 htab_t dllimport_map;
7826
7827 static tree
7828 get_dllimport_decl (tree decl)
7829 {
7830 struct tree_map *h, in;
7831 void **loc;
7832 const char *name;
7833 const char *prefix;
7834 size_t namelen, prefixlen;
7835 char *imp_name;
7836 tree to;
7837 rtx rtl;
7838
7839 if (!dllimport_map)
7840 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7841
7842 in.hash = htab_hash_pointer (decl);
7843 in.base.from = decl;
7844 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7845 h = (struct tree_map *) *loc;
7846 if (h)
7847 return h->to;
7848
7849 *loc = h = GGC_NEW (struct tree_map);
7850 h->hash = in.hash;
7851 h->base.from = decl;
7852 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7853 DECL_ARTIFICIAL (to) = 1;
7854 DECL_IGNORED_P (to) = 1;
7855 DECL_EXTERNAL (to) = 1;
7856 TREE_READONLY (to) = 1;
7857
7858 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7859 name = targetm.strip_name_encoding (name);
7860 prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__";
7861 namelen = strlen (name);
7862 prefixlen = strlen (prefix);
7863 imp_name = (char *) alloca (namelen + prefixlen + 1);
7864 memcpy (imp_name, prefix, prefixlen);
7865 memcpy (imp_name + prefixlen, name, namelen + 1);
7866
7867 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7868 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7869 SET_SYMBOL_REF_DECL (rtl, to);
7870 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7871
7872 rtl = gen_const_mem (Pmode, rtl);
7873 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7874
7875 SET_DECL_RTL (to, rtl);
7876 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
7877
7878 return to;
7879 }
7880
7881 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7882 true if we require the result be a register. */
7883
7884 static rtx
7885 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7886 {
7887 tree imp_decl;
7888 rtx x;
7889
7890 gcc_assert (SYMBOL_REF_DECL (symbol));
7891 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7892
7893 x = DECL_RTL (imp_decl);
7894 if (want_reg)
7895 x = force_reg (Pmode, x);
7896 return x;
7897 }
7898
7899 /* Try machine-dependent ways of modifying an illegitimate address
7900 to be legitimate. If we find one, return the new, valid address.
7901 This macro is used in only one place: `memory_address' in explow.c.
7902
7903 OLDX is the address as it was before break_out_memory_refs was called.
7904 In some cases it is useful to look at this to decide what needs to be done.
7905
7906 MODE and WIN are passed so that this macro can use
7907 GO_IF_LEGITIMATE_ADDRESS.
7908
7909 It is always safe for this macro to do nothing. It exists to recognize
7910 opportunities to optimize the output.
7911
7912 For the 80386, we handle X+REG by loading X into a register R and
7913 using R+REG. R will go in a general reg and indexing will be used.
7914 However, if REG is a broken-out memory address or multiplication,
7915 nothing needs to be done because REG can certainly go in a general reg.
7916
7917 When -fpic is used, special handling is needed for symbolic references.
7918 See comments by legitimize_pic_address in i386.c for details. */
7919
7920 rtx
7921 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7922 {
7923 int changed = 0;
7924 unsigned log;
7925
7926 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7927 if (log)
7928 return legitimize_tls_address (x, (enum tls_model) log, false);
7929 if (GET_CODE (x) == CONST
7930 && GET_CODE (XEXP (x, 0)) == PLUS
7931 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7932 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7933 {
7934 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
7935 (enum tls_model) log, false);
7936 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7937 }
7938
7939 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7940 {
7941 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7942 return legitimize_dllimport_symbol (x, true);
7943 if (GET_CODE (x) == CONST
7944 && GET_CODE (XEXP (x, 0)) == PLUS
7945 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7946 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7947 {
7948 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7949 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7950 }
7951 }
7952
7953 if (flag_pic && SYMBOLIC_CONST (x))
7954 return legitimize_pic_address (x, 0);
7955
7956 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7957 if (GET_CODE (x) == ASHIFT
7958 && CONST_INT_P (XEXP (x, 1))
7959 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7960 {
7961 changed = 1;
7962 log = INTVAL (XEXP (x, 1));
7963 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7964 GEN_INT (1 << log));
7965 }
7966
7967 if (GET_CODE (x) == PLUS)
7968 {
7969 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7970
7971 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7972 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7973 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7974 {
7975 changed = 1;
7976 log = INTVAL (XEXP (XEXP (x, 0), 1));
7977 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7978 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7979 GEN_INT (1 << log));
7980 }
7981
7982 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7983 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7984 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7985 {
7986 changed = 1;
7987 log = INTVAL (XEXP (XEXP (x, 1), 1));
7988 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7989 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7990 GEN_INT (1 << log));
7991 }
7992
7993 /* Put multiply first if it isn't already. */
7994 if (GET_CODE (XEXP (x, 1)) == MULT)
7995 {
7996 rtx tmp = XEXP (x, 0);
7997 XEXP (x, 0) = XEXP (x, 1);
7998 XEXP (x, 1) = tmp;
7999 changed = 1;
8000 }
8001
8002 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
8003 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
8004 created by virtual register instantiation, register elimination, and
8005 similar optimizations. */
8006 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
8007 {
8008 changed = 1;
8009 x = gen_rtx_PLUS (Pmode,
8010 gen_rtx_PLUS (Pmode, XEXP (x, 0),
8011 XEXP (XEXP (x, 1), 0)),
8012 XEXP (XEXP (x, 1), 1));
8013 }
8014
8015 /* Canonicalize
8016 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
8017 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
8018 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8019 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8020 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
8021 && CONSTANT_P (XEXP (x, 1)))
8022 {
8023 rtx constant;
8024 rtx other = NULL_RTX;
8025
8026 if (CONST_INT_P (XEXP (x, 1)))
8027 {
8028 constant = XEXP (x, 1);
8029 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
8030 }
8031 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
8032 {
8033 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
8034 other = XEXP (x, 1);
8035 }
8036 else
8037 constant = 0;
8038
8039 if (constant)
8040 {
8041 changed = 1;
8042 x = gen_rtx_PLUS (Pmode,
8043 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
8044 XEXP (XEXP (XEXP (x, 0), 1), 0)),
8045 plus_constant (other, INTVAL (constant)));
8046 }
8047 }
8048
8049 if (changed && legitimate_address_p (mode, x, FALSE))
8050 return x;
8051
8052 if (GET_CODE (XEXP (x, 0)) == MULT)
8053 {
8054 changed = 1;
8055 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
8056 }
8057
8058 if (GET_CODE (XEXP (x, 1)) == MULT)
8059 {
8060 changed = 1;
8061 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
8062 }
8063
8064 if (changed
8065 && REG_P (XEXP (x, 1))
8066 && REG_P (XEXP (x, 0)))
8067 return x;
8068
8069 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
8070 {
8071 changed = 1;
8072 x = legitimize_pic_address (x, 0);
8073 }
8074
8075 if (changed && legitimate_address_p (mode, x, FALSE))
8076 return x;
8077
8078 if (REG_P (XEXP (x, 0)))
8079 {
8080 rtx temp = gen_reg_rtx (Pmode);
8081 rtx val = force_operand (XEXP (x, 1), temp);
8082 if (val != temp)
8083 emit_move_insn (temp, val);
8084
8085 XEXP (x, 1) = temp;
8086 return x;
8087 }
8088
8089 else if (REG_P (XEXP (x, 1)))
8090 {
8091 rtx temp = gen_reg_rtx (Pmode);
8092 rtx val = force_operand (XEXP (x, 0), temp);
8093 if (val != temp)
8094 emit_move_insn (temp, val);
8095
8096 XEXP (x, 0) = temp;
8097 return x;
8098 }
8099 }
8100
8101 return x;
8102 }
8103 \f
8104 /* Print an integer constant expression in assembler syntax. Addition
8105 and subtraction are the only arithmetic that may appear in these
8106 expressions. FILE is the stdio stream to write to, X is the rtx, and
8107 CODE is the operand print code from the output string. */
8108
8109 static void
8110 output_pic_addr_const (FILE *file, rtx x, int code)
8111 {
8112 char buf[256];
8113
8114 switch (GET_CODE (x))
8115 {
8116 case PC:
8117 gcc_assert (flag_pic);
8118 putc ('.', file);
8119 break;
8120
8121 case SYMBOL_REF:
8122 if (! TARGET_MACHO || TARGET_64BIT)
8123 output_addr_const (file, x);
8124 else
8125 {
8126 const char *name = XSTR (x, 0);
8127
8128 /* Mark the decl as referenced so that cgraph will
8129 output the function. */
8130 if (SYMBOL_REF_DECL (x))
8131 mark_decl_referenced (SYMBOL_REF_DECL (x));
8132
8133 #if TARGET_MACHO
8134 if (MACHOPIC_INDIRECT
8135 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
8136 name = machopic_indirection_name (x, /*stub_p=*/true);
8137 #endif
8138 assemble_name (file, name);
8139 }
8140 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
8141 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
8142 fputs ("@PLT", file);
8143 break;
8144
8145 case LABEL_REF:
8146 x = XEXP (x, 0);
8147 /* FALLTHRU */
8148 case CODE_LABEL:
8149 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
8150 assemble_name (asm_out_file, buf);
8151 break;
8152
8153 case CONST_INT:
8154 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8155 break;
8156
8157 case CONST:
8158 /* This used to output parentheses around the expression,
8159 but that does not work on the 386 (either ATT or BSD assembler). */
8160 output_pic_addr_const (file, XEXP (x, 0), code);
8161 break;
8162
8163 case CONST_DOUBLE:
8164 if (GET_MODE (x) == VOIDmode)
8165 {
8166 /* We can use %d if the number is <32 bits and positive. */
8167 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
8168 fprintf (file, "0x%lx%08lx",
8169 (unsigned long) CONST_DOUBLE_HIGH (x),
8170 (unsigned long) CONST_DOUBLE_LOW (x));
8171 else
8172 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
8173 }
8174 else
8175 /* We can't handle floating point constants;
8176 PRINT_OPERAND must handle them. */
8177 output_operand_lossage ("floating constant misused");
8178 break;
8179
8180 case PLUS:
8181 /* Some assemblers need integer constants to appear first. */
8182 if (CONST_INT_P (XEXP (x, 0)))
8183 {
8184 output_pic_addr_const (file, XEXP (x, 0), code);
8185 putc ('+', file);
8186 output_pic_addr_const (file, XEXP (x, 1), code);
8187 }
8188 else
8189 {
8190 gcc_assert (CONST_INT_P (XEXP (x, 1)));
8191 output_pic_addr_const (file, XEXP (x, 1), code);
8192 putc ('+', file);
8193 output_pic_addr_const (file, XEXP (x, 0), code);
8194 }
8195 break;
8196
8197 case MINUS:
8198 if (!TARGET_MACHO)
8199 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
8200 output_pic_addr_const (file, XEXP (x, 0), code);
8201 putc ('-', file);
8202 output_pic_addr_const (file, XEXP (x, 1), code);
8203 if (!TARGET_MACHO)
8204 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
8205 break;
8206
8207 case UNSPEC:
8208 gcc_assert (XVECLEN (x, 0) == 1);
8209 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8210 switch (XINT (x, 1))
8211 {
8212 case UNSPEC_GOT:
8213 fputs ("@GOT", file);
8214 break;
8215 case UNSPEC_GOTOFF:
8216 fputs ("@GOTOFF", file);
8217 break;
8218 case UNSPEC_PLTOFF:
8219 fputs ("@PLTOFF", file);
8220 break;
8221 case UNSPEC_GOTPCREL:
8222 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8223 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
8224 break;
8225 case UNSPEC_GOTTPOFF:
8226 /* FIXME: This might be @TPOFF in Sun ld too. */
8227 fputs ("@GOTTPOFF", file);
8228 break;
8229 case UNSPEC_TPOFF:
8230 fputs ("@TPOFF", file);
8231 break;
8232 case UNSPEC_NTPOFF:
8233 if (TARGET_64BIT)
8234 fputs ("@TPOFF", file);
8235 else
8236 fputs ("@NTPOFF", file);
8237 break;
8238 case UNSPEC_DTPOFF:
8239 fputs ("@DTPOFF", file);
8240 break;
8241 case UNSPEC_GOTNTPOFF:
8242 if (TARGET_64BIT)
8243 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8244 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
8245 else
8246 fputs ("@GOTNTPOFF", file);
8247 break;
8248 case UNSPEC_INDNTPOFF:
8249 fputs ("@INDNTPOFF", file);
8250 break;
8251 default:
8252 output_operand_lossage ("invalid UNSPEC as operand");
8253 break;
8254 }
8255 break;
8256
8257 default:
8258 output_operand_lossage ("invalid expression as operand");
8259 }
8260 }
8261
8262 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8263 We need to emit DTP-relative relocations. */
8264
8265 static void ATTRIBUTE_UNUSED
8266 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8267 {
8268 fputs (ASM_LONG, file);
8269 output_addr_const (file, x);
8270 fputs ("@DTPOFF", file);
8271 switch (size)
8272 {
8273 case 4:
8274 break;
8275 case 8:
8276 fputs (", 0", file);
8277 break;
8278 default:
8279 gcc_unreachable ();
8280 }
8281 }
8282
8283 /* In the name of slightly smaller debug output, and to cater to
8284 general assembler lossage, recognize PIC+GOTOFF and turn it back
8285 into a direct symbol reference.
8286
8287 On Darwin, this is necessary to avoid a crash, because Darwin
8288 has a different PIC label for each routine but the DWARF debugging
8289 information is not associated with any particular routine, so it's
8290 necessary to remove references to the PIC label from RTL stored by
8291 the DWARF output code. */
8292
8293 static rtx
8294 ix86_delegitimize_address (rtx orig_x)
8295 {
8296 rtx x = orig_x;
8297 /* reg_addend is NULL or a multiple of some register. */
8298 rtx reg_addend = NULL_RTX;
8299 /* const_addend is NULL or a const_int. */
8300 rtx const_addend = NULL_RTX;
8301 /* This is the result, or NULL. */
8302 rtx result = NULL_RTX;
8303
8304 if (MEM_P (x))
8305 x = XEXP (x, 0);
8306
8307 if (TARGET_64BIT)
8308 {
8309 if (GET_CODE (x) != CONST
8310 || GET_CODE (XEXP (x, 0)) != UNSPEC
8311 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8312 || !MEM_P (orig_x))
8313 return orig_x;
8314 return XVECEXP (XEXP (x, 0), 0, 0);
8315 }
8316
8317 if (GET_CODE (x) != PLUS
8318 || GET_CODE (XEXP (x, 1)) != CONST)
8319 return orig_x;
8320
8321 if (REG_P (XEXP (x, 0))
8322 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8323 /* %ebx + GOT/GOTOFF */
8324 ;
8325 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8326 {
8327 /* %ebx + %reg * scale + GOT/GOTOFF */
8328 reg_addend = XEXP (x, 0);
8329 if (REG_P (XEXP (reg_addend, 0))
8330 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8331 reg_addend = XEXP (reg_addend, 1);
8332 else if (REG_P (XEXP (reg_addend, 1))
8333 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8334 reg_addend = XEXP (reg_addend, 0);
8335 else
8336 return orig_x;
8337 if (!REG_P (reg_addend)
8338 && GET_CODE (reg_addend) != MULT
8339 && GET_CODE (reg_addend) != ASHIFT)
8340 return orig_x;
8341 }
8342 else
8343 return orig_x;
8344
8345 x = XEXP (XEXP (x, 1), 0);
8346 if (GET_CODE (x) == PLUS
8347 && CONST_INT_P (XEXP (x, 1)))
8348 {
8349 const_addend = XEXP (x, 1);
8350 x = XEXP (x, 0);
8351 }
8352
8353 if (GET_CODE (x) == UNSPEC
8354 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8355 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8356 result = XVECEXP (x, 0, 0);
8357
8358 if (TARGET_MACHO && darwin_local_data_pic (x)
8359 && !MEM_P (orig_x))
8360 result = XEXP (x, 0);
8361
8362 if (! result)
8363 return orig_x;
8364
8365 if (const_addend)
8366 result = gen_rtx_PLUS (Pmode, result, const_addend);
8367 if (reg_addend)
8368 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8369 return result;
8370 }
8371
8372 /* If X is a machine specific address (i.e. a symbol or label being
8373 referenced as a displacement from the GOT implemented using an
8374 UNSPEC), then return the base term. Otherwise return X. */
8375
8376 rtx
8377 ix86_find_base_term (rtx x)
8378 {
8379 rtx term;
8380
8381 if (TARGET_64BIT)
8382 {
8383 if (GET_CODE (x) != CONST)
8384 return x;
8385 term = XEXP (x, 0);
8386 if (GET_CODE (term) == PLUS
8387 && (CONST_INT_P (XEXP (term, 1))
8388 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8389 term = XEXP (term, 0);
8390 if (GET_CODE (term) != UNSPEC
8391 || XINT (term, 1) != UNSPEC_GOTPCREL)
8392 return x;
8393
8394 term = XVECEXP (term, 0, 0);
8395
8396 if (GET_CODE (term) != SYMBOL_REF
8397 && GET_CODE (term) != LABEL_REF)
8398 return x;
8399
8400 return term;
8401 }
8402
8403 term = ix86_delegitimize_address (x);
8404
8405 if (GET_CODE (term) != SYMBOL_REF
8406 && GET_CODE (term) != LABEL_REF)
8407 return x;
8408
8409 return term;
8410 }
8411 \f
8412 static void
8413 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8414 int fp, FILE *file)
8415 {
8416 const char *suffix;
8417
8418 if (mode == CCFPmode || mode == CCFPUmode)
8419 {
8420 enum rtx_code second_code, bypass_code;
8421 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8422 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8423 code = ix86_fp_compare_code_to_integer (code);
8424 mode = CCmode;
8425 }
8426 if (reverse)
8427 code = reverse_condition (code);
8428
8429 switch (code)
8430 {
8431 case EQ:
8432 switch (mode)
8433 {
8434 case CCAmode:
8435 suffix = "a";
8436 break;
8437
8438 case CCCmode:
8439 suffix = "c";
8440 break;
8441
8442 case CCOmode:
8443 suffix = "o";
8444 break;
8445
8446 case CCSmode:
8447 suffix = "s";
8448 break;
8449
8450 default:
8451 suffix = "e";
8452 }
8453 break;
8454 case NE:
8455 switch (mode)
8456 {
8457 case CCAmode:
8458 suffix = "na";
8459 break;
8460
8461 case CCCmode:
8462 suffix = "nc";
8463 break;
8464
8465 case CCOmode:
8466 suffix = "no";
8467 break;
8468
8469 case CCSmode:
8470 suffix = "ns";
8471 break;
8472
8473 default:
8474 suffix = "ne";
8475 }
8476 break;
8477 case GT:
8478 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8479 suffix = "g";
8480 break;
8481 case GTU:
8482 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8483 Those same assemblers have the same but opposite lossage on cmov. */
8484 if (mode == CCmode)
8485 suffix = fp ? "nbe" : "a";
8486 else if (mode == CCCmode)
8487 suffix = "b";
8488 else
8489 gcc_unreachable ();
8490 break;
8491 case LT:
8492 switch (mode)
8493 {
8494 case CCNOmode:
8495 case CCGOCmode:
8496 suffix = "s";
8497 break;
8498
8499 case CCmode:
8500 case CCGCmode:
8501 suffix = "l";
8502 break;
8503
8504 default:
8505 gcc_unreachable ();
8506 }
8507 break;
8508 case LTU:
8509 gcc_assert (mode == CCmode || mode == CCCmode);
8510 suffix = "b";
8511 break;
8512 case GE:
8513 switch (mode)
8514 {
8515 case CCNOmode:
8516 case CCGOCmode:
8517 suffix = "ns";
8518 break;
8519
8520 case CCmode:
8521 case CCGCmode:
8522 suffix = "ge";
8523 break;
8524
8525 default:
8526 gcc_unreachable ();
8527 }
8528 break;
8529 case GEU:
8530 /* ??? As above. */
8531 gcc_assert (mode == CCmode || mode == CCCmode);
8532 suffix = fp ? "nb" : "ae";
8533 break;
8534 case LE:
8535 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8536 suffix = "le";
8537 break;
8538 case LEU:
8539 /* ??? As above. */
8540 if (mode == CCmode)
8541 suffix = "be";
8542 else if (mode == CCCmode)
8543 suffix = fp ? "nb" : "ae";
8544 else
8545 gcc_unreachable ();
8546 break;
8547 case UNORDERED:
8548 suffix = fp ? "u" : "p";
8549 break;
8550 case ORDERED:
8551 suffix = fp ? "nu" : "np";
8552 break;
8553 default:
8554 gcc_unreachable ();
8555 }
8556 fputs (suffix, file);
8557 }
8558
8559 /* Print the name of register X to FILE based on its machine mode and number.
8560 If CODE is 'w', pretend the mode is HImode.
8561 If CODE is 'b', pretend the mode is QImode.
8562 If CODE is 'k', pretend the mode is SImode.
8563 If CODE is 'q', pretend the mode is DImode.
8564 If CODE is 'h', pretend the reg is the 'high' byte register.
8565 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8566
8567 void
8568 print_reg (rtx x, int code, FILE *file)
8569 {
8570 gcc_assert (x == pc_rtx
8571 || (REGNO (x) != ARG_POINTER_REGNUM
8572 && REGNO (x) != FRAME_POINTER_REGNUM
8573 && REGNO (x) != FLAGS_REG
8574 && REGNO (x) != FPSR_REG
8575 && REGNO (x) != FPCR_REG));
8576
8577 if (ASSEMBLER_DIALECT == ASM_ATT)
8578 putc ('%', file);
8579
8580 if (x == pc_rtx)
8581 {
8582 gcc_assert (TARGET_64BIT);
8583 fputs ("rip", file);
8584 return;
8585 }
8586
8587 if (code == 'w' || MMX_REG_P (x))
8588 code = 2;
8589 else if (code == 'b')
8590 code = 1;
8591 else if (code == 'k')
8592 code = 4;
8593 else if (code == 'q')
8594 code = 8;
8595 else if (code == 'y')
8596 code = 3;
8597 else if (code == 'h')
8598 code = 0;
8599 else
8600 code = GET_MODE_SIZE (GET_MODE (x));
8601
8602 /* Irritatingly, AMD extended registers use different naming convention
8603 from the normal registers. */
8604 if (REX_INT_REG_P (x))
8605 {
8606 gcc_assert (TARGET_64BIT);
8607 switch (code)
8608 {
8609 case 0:
8610 error ("extended registers have no high halves");
8611 break;
8612 case 1:
8613 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8614 break;
8615 case 2:
8616 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8617 break;
8618 case 4:
8619 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8620 break;
8621 case 8:
8622 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8623 break;
8624 default:
8625 error ("unsupported operand size for extended register");
8626 break;
8627 }
8628 return;
8629 }
8630 switch (code)
8631 {
8632 case 3:
8633 if (STACK_TOP_P (x))
8634 {
8635 fputs ("st(0)", file);
8636 break;
8637 }
8638 /* FALLTHRU */
8639 case 8:
8640 case 4:
8641 case 12:
8642 if (! ANY_FP_REG_P (x))
8643 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8644 /* FALLTHRU */
8645 case 16:
8646 case 2:
8647 normal:
8648 fputs (hi_reg_name[REGNO (x)], file);
8649 break;
8650 case 1:
8651 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8652 goto normal;
8653 fputs (qi_reg_name[REGNO (x)], file);
8654 break;
8655 case 0:
8656 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8657 goto normal;
8658 fputs (qi_high_reg_name[REGNO (x)], file);
8659 break;
8660 default:
8661 gcc_unreachable ();
8662 }
8663 }
8664
8665 /* Locate some local-dynamic symbol still in use by this function
8666 so that we can print its name in some tls_local_dynamic_base
8667 pattern. */
8668
8669 static int
8670 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8671 {
8672 rtx x = *px;
8673
8674 if (GET_CODE (x) == SYMBOL_REF
8675 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8676 {
8677 cfun->machine->some_ld_name = XSTR (x, 0);
8678 return 1;
8679 }
8680
8681 return 0;
8682 }
8683
8684 static const char *
8685 get_some_local_dynamic_name (void)
8686 {
8687 rtx insn;
8688
8689 if (cfun->machine->some_ld_name)
8690 return cfun->machine->some_ld_name;
8691
8692 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8693 if (INSN_P (insn)
8694 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8695 return cfun->machine->some_ld_name;
8696
8697 gcc_unreachable ();
8698 }
8699
8700 /* Meaning of CODE:
8701 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8702 C -- print opcode suffix for set/cmov insn.
8703 c -- like C, but print reversed condition
8704 F,f -- likewise, but for floating-point.
8705 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8706 otherwise nothing
8707 R -- print the prefix for register names.
8708 z -- print the opcode suffix for the size of the current operand.
8709 * -- print a star (in certain assembler syntax)
8710 A -- print an absolute memory reference.
8711 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8712 s -- print a shift double count, followed by the assemblers argument
8713 delimiter.
8714 b -- print the QImode name of the register for the indicated operand.
8715 %b0 would print %al if operands[0] is reg 0.
8716 w -- likewise, print the HImode name of the register.
8717 k -- likewise, print the SImode name of the register.
8718 q -- likewise, print the DImode name of the register.
8719 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8720 y -- print "st(0)" instead of "st" as a register.
8721 D -- print condition for SSE cmp instruction.
8722 P -- if PIC, print an @PLT suffix.
8723 X -- don't print any sort of PIC '@' suffix for a symbol.
8724 & -- print some in-use local-dynamic symbol name.
8725 H -- print a memory address offset by 8; used for sse high-parts
8726 Y -- print condition for SSE5 com* instruction.
8727 + -- print a branch hint as 'cs' or 'ds' prefix
8728 ; -- print a semicolon (after prefixes due to bug in older gas).
8729 */
8730
8731 void
8732 print_operand (FILE *file, rtx x, int code)
8733 {
8734 if (code)
8735 {
8736 switch (code)
8737 {
8738 case '*':
8739 if (ASSEMBLER_DIALECT == ASM_ATT)
8740 putc ('*', file);
8741 return;
8742
8743 case '&':
8744 assemble_name (file, get_some_local_dynamic_name ());
8745 return;
8746
8747 case 'A':
8748 switch (ASSEMBLER_DIALECT)
8749 {
8750 case ASM_ATT:
8751 putc ('*', file);
8752 break;
8753
8754 case ASM_INTEL:
8755 /* Intel syntax. For absolute addresses, registers should not
8756 be surrounded by braces. */
8757 if (!REG_P (x))
8758 {
8759 putc ('[', file);
8760 PRINT_OPERAND (file, x, 0);
8761 putc (']', file);
8762 return;
8763 }
8764 break;
8765
8766 default:
8767 gcc_unreachable ();
8768 }
8769
8770 PRINT_OPERAND (file, x, 0);
8771 return;
8772
8773
8774 case 'L':
8775 if (ASSEMBLER_DIALECT == ASM_ATT)
8776 putc ('l', file);
8777 return;
8778
8779 case 'W':
8780 if (ASSEMBLER_DIALECT == ASM_ATT)
8781 putc ('w', file);
8782 return;
8783
8784 case 'B':
8785 if (ASSEMBLER_DIALECT == ASM_ATT)
8786 putc ('b', file);
8787 return;
8788
8789 case 'Q':
8790 if (ASSEMBLER_DIALECT == ASM_ATT)
8791 putc ('l', file);
8792 return;
8793
8794 case 'S':
8795 if (ASSEMBLER_DIALECT == ASM_ATT)
8796 putc ('s', file);
8797 return;
8798
8799 case 'T':
8800 if (ASSEMBLER_DIALECT == ASM_ATT)
8801 putc ('t', file);
8802 return;
8803
8804 case 'z':
8805 /* 387 opcodes don't get size suffixes if the operands are
8806 registers. */
8807 if (STACK_REG_P (x))
8808 return;
8809
8810 /* Likewise if using Intel opcodes. */
8811 if (ASSEMBLER_DIALECT == ASM_INTEL)
8812 return;
8813
8814 /* This is the size of op from size of operand. */
8815 switch (GET_MODE_SIZE (GET_MODE (x)))
8816 {
8817 case 1:
8818 putc ('b', file);
8819 return;
8820
8821 case 2:
8822 if (MEM_P (x))
8823 {
8824 #ifdef HAVE_GAS_FILDS_FISTS
8825 putc ('s', file);
8826 #endif
8827 return;
8828 }
8829 else
8830 putc ('w', file);
8831 return;
8832
8833 case 4:
8834 if (GET_MODE (x) == SFmode)
8835 {
8836 putc ('s', file);
8837 return;
8838 }
8839 else
8840 putc ('l', file);
8841 return;
8842
8843 case 12:
8844 case 16:
8845 putc ('t', file);
8846 return;
8847
8848 case 8:
8849 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8850 {
8851 #ifdef GAS_MNEMONICS
8852 putc ('q', file);
8853 #else
8854 putc ('l', file);
8855 putc ('l', file);
8856 #endif
8857 }
8858 else
8859 putc ('l', file);
8860 return;
8861
8862 default:
8863 gcc_unreachable ();
8864 }
8865
8866 case 'b':
8867 case 'w':
8868 case 'k':
8869 case 'q':
8870 case 'h':
8871 case 'y':
8872 case 'X':
8873 case 'P':
8874 break;
8875
8876 case 's':
8877 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8878 {
8879 PRINT_OPERAND (file, x, 0);
8880 putc (',', file);
8881 }
8882 return;
8883
8884 case 'D':
8885 /* Little bit of braindamage here. The SSE compare instructions
8886 does use completely different names for the comparisons that the
8887 fp conditional moves. */
8888 switch (GET_CODE (x))
8889 {
8890 case EQ:
8891 case UNEQ:
8892 fputs ("eq", file);
8893 break;
8894 case LT:
8895 case UNLT:
8896 fputs ("lt", file);
8897 break;
8898 case LE:
8899 case UNLE:
8900 fputs ("le", file);
8901 break;
8902 case UNORDERED:
8903 fputs ("unord", file);
8904 break;
8905 case NE:
8906 case LTGT:
8907 fputs ("neq", file);
8908 break;
8909 case UNGE:
8910 case GE:
8911 fputs ("nlt", file);
8912 break;
8913 case UNGT:
8914 case GT:
8915 fputs ("nle", file);
8916 break;
8917 case ORDERED:
8918 fputs ("ord", file);
8919 break;
8920 default:
8921 gcc_unreachable ();
8922 }
8923 return;
8924 case 'O':
8925 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8926 if (ASSEMBLER_DIALECT == ASM_ATT)
8927 {
8928 switch (GET_MODE (x))
8929 {
8930 case HImode: putc ('w', file); break;
8931 case SImode:
8932 case SFmode: putc ('l', file); break;
8933 case DImode:
8934 case DFmode: putc ('q', file); break;
8935 default: gcc_unreachable ();
8936 }
8937 putc ('.', file);
8938 }
8939 #endif
8940 return;
8941 case 'C':
8942 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8943 return;
8944 case 'F':
8945 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8946 if (ASSEMBLER_DIALECT == ASM_ATT)
8947 putc ('.', file);
8948 #endif
8949 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8950 return;
8951
8952 /* Like above, but reverse condition */
8953 case 'c':
8954 /* Check to see if argument to %c is really a constant
8955 and not a condition code which needs to be reversed. */
8956 if (!COMPARISON_P (x))
8957 {
8958 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8959 return;
8960 }
8961 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8962 return;
8963 case 'f':
8964 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8965 if (ASSEMBLER_DIALECT == ASM_ATT)
8966 putc ('.', file);
8967 #endif
8968 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8969 return;
8970
8971 case 'H':
8972 /* It doesn't actually matter what mode we use here, as we're
8973 only going to use this for printing. */
8974 x = adjust_address_nv (x, DImode, 8);
8975 break;
8976
8977 case '+':
8978 {
8979 rtx x;
8980
8981 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8982 return;
8983
8984 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8985 if (x)
8986 {
8987 int pred_val = INTVAL (XEXP (x, 0));
8988
8989 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8990 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8991 {
8992 int taken = pred_val > REG_BR_PROB_BASE / 2;
8993 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8994
8995 /* Emit hints only in the case default branch prediction
8996 heuristics would fail. */
8997 if (taken != cputaken)
8998 {
8999 /* We use 3e (DS) prefix for taken branches and
9000 2e (CS) prefix for not taken branches. */
9001 if (taken)
9002 fputs ("ds ; ", file);
9003 else
9004 fputs ("cs ; ", file);
9005 }
9006 }
9007 }
9008 return;
9009 }
9010
9011 case 'Y':
9012 switch (GET_CODE (x))
9013 {
9014 case NE:
9015 fputs ("neq", file);
9016 break;
9017 case EQ:
9018 fputs ("eq", file);
9019 break;
9020 case GE:
9021 case GEU:
9022 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
9023 break;
9024 case GT:
9025 case GTU:
9026 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
9027 break;
9028 case LE:
9029 case LEU:
9030 fputs ("le", file);
9031 break;
9032 case LT:
9033 case LTU:
9034 fputs ("lt", file);
9035 break;
9036 case UNORDERED:
9037 fputs ("unord", file);
9038 break;
9039 case ORDERED:
9040 fputs ("ord", file);
9041 break;
9042 case UNEQ:
9043 fputs ("ueq", file);
9044 break;
9045 case UNGE:
9046 fputs ("nlt", file);
9047 break;
9048 case UNGT:
9049 fputs ("nle", file);
9050 break;
9051 case UNLE:
9052 fputs ("ule", file);
9053 break;
9054 case UNLT:
9055 fputs ("ult", file);
9056 break;
9057 case LTGT:
9058 fputs ("une", file);
9059 break;
9060 default:
9061 gcc_unreachable ();
9062 }
9063 return;
9064
9065 case ';':
9066 #if TARGET_MACHO
9067 fputs (" ; ", file);
9068 #else
9069 fputc (' ', file);
9070 #endif
9071 return;
9072
9073 default:
9074 output_operand_lossage ("invalid operand code '%c'", code);
9075 }
9076 }
9077
9078 if (REG_P (x))
9079 print_reg (x, code, file);
9080
9081 else if (MEM_P (x))
9082 {
9083 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
9084 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
9085 && GET_MODE (x) != BLKmode)
9086 {
9087 const char * size;
9088 switch (GET_MODE_SIZE (GET_MODE (x)))
9089 {
9090 case 1: size = "BYTE"; break;
9091 case 2: size = "WORD"; break;
9092 case 4: size = "DWORD"; break;
9093 case 8: size = "QWORD"; break;
9094 case 12: size = "XWORD"; break;
9095 case 16:
9096 if (GET_MODE (x) == XFmode)
9097 size = "XWORD";
9098 else
9099 size = "XMMWORD";
9100 break;
9101 default:
9102 gcc_unreachable ();
9103 }
9104
9105 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9106 if (code == 'b')
9107 size = "BYTE";
9108 else if (code == 'w')
9109 size = "WORD";
9110 else if (code == 'k')
9111 size = "DWORD";
9112
9113 fputs (size, file);
9114 fputs (" PTR ", file);
9115 }
9116
9117 x = XEXP (x, 0);
9118 /* Avoid (%rip) for call operands. */
9119 if (CONSTANT_ADDRESS_P (x) && code == 'P'
9120 && !CONST_INT_P (x))
9121 output_addr_const (file, x);
9122 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
9123 output_operand_lossage ("invalid constraints for operand");
9124 else
9125 output_address (x);
9126 }
9127
9128 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
9129 {
9130 REAL_VALUE_TYPE r;
9131 long l;
9132
9133 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9134 REAL_VALUE_TO_TARGET_SINGLE (r, l);
9135
9136 if (ASSEMBLER_DIALECT == ASM_ATT)
9137 putc ('$', file);
9138 fprintf (file, "0x%08lx", l);
9139 }
9140
9141 /* These float cases don't actually occur as immediate operands. */
9142 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
9143 {
9144 char dstr[30];
9145
9146 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9147 fprintf (file, "%s", dstr);
9148 }
9149
9150 else if (GET_CODE (x) == CONST_DOUBLE
9151 && GET_MODE (x) == XFmode)
9152 {
9153 char dstr[30];
9154
9155 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9156 fprintf (file, "%s", dstr);
9157 }
9158
9159 else
9160 {
9161 /* We have patterns that allow zero sets of memory, for instance.
9162 In 64-bit mode, we should probably support all 8-byte vectors,
9163 since we can in fact encode that into an immediate. */
9164 if (GET_CODE (x) == CONST_VECTOR)
9165 {
9166 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
9167 x = const0_rtx;
9168 }
9169
9170 if (code != 'P')
9171 {
9172 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
9173 {
9174 if (ASSEMBLER_DIALECT == ASM_ATT)
9175 putc ('$', file);
9176 }
9177 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
9178 || GET_CODE (x) == LABEL_REF)
9179 {
9180 if (ASSEMBLER_DIALECT == ASM_ATT)
9181 putc ('$', file);
9182 else
9183 fputs ("OFFSET FLAT:", file);
9184 }
9185 }
9186 if (CONST_INT_P (x))
9187 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9188 else if (flag_pic)
9189 output_pic_addr_const (file, x, code);
9190 else
9191 output_addr_const (file, x);
9192 }
9193 }
9194 \f
9195 /* Print a memory operand whose address is ADDR. */
9196
9197 void
9198 print_operand_address (FILE *file, rtx addr)
9199 {
9200 struct ix86_address parts;
9201 rtx base, index, disp;
9202 int scale;
9203 int ok = ix86_decompose_address (addr, &parts);
9204
9205 gcc_assert (ok);
9206
9207 base = parts.base;
9208 index = parts.index;
9209 disp = parts.disp;
9210 scale = parts.scale;
9211
9212 switch (parts.seg)
9213 {
9214 case SEG_DEFAULT:
9215 break;
9216 case SEG_FS:
9217 case SEG_GS:
9218 if (ASSEMBLER_DIALECT == ASM_ATT)
9219 putc ('%', file);
9220 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
9221 break;
9222 default:
9223 gcc_unreachable ();
9224 }
9225
9226 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9227 if (TARGET_64BIT && !base && !index)
9228 {
9229 rtx symbol = disp;
9230
9231 if (GET_CODE (disp) == CONST
9232 && GET_CODE (XEXP (disp, 0)) == PLUS
9233 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9234 symbol = XEXP (XEXP (disp, 0), 0);
9235
9236 if (GET_CODE (symbol) == LABEL_REF
9237 || (GET_CODE (symbol) == SYMBOL_REF
9238 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
9239 base = pc_rtx;
9240 }
9241 if (!base && !index)
9242 {
9243 /* Displacement only requires special attention. */
9244
9245 if (CONST_INT_P (disp))
9246 {
9247 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
9248 fputs ("ds:", file);
9249 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
9250 }
9251 else if (flag_pic)
9252 output_pic_addr_const (file, disp, 0);
9253 else
9254 output_addr_const (file, disp);
9255 }
9256 else
9257 {
9258 if (ASSEMBLER_DIALECT == ASM_ATT)
9259 {
9260 if (disp)
9261 {
9262 if (flag_pic)
9263 output_pic_addr_const (file, disp, 0);
9264 else if (GET_CODE (disp) == LABEL_REF)
9265 output_asm_label (disp);
9266 else
9267 output_addr_const (file, disp);
9268 }
9269
9270 putc ('(', file);
9271 if (base)
9272 print_reg (base, 0, file);
9273 if (index)
9274 {
9275 putc (',', file);
9276 print_reg (index, 0, file);
9277 if (scale != 1)
9278 fprintf (file, ",%d", scale);
9279 }
9280 putc (')', file);
9281 }
9282 else
9283 {
9284 rtx offset = NULL_RTX;
9285
9286 if (disp)
9287 {
9288 /* Pull out the offset of a symbol; print any symbol itself. */
9289 if (GET_CODE (disp) == CONST
9290 && GET_CODE (XEXP (disp, 0)) == PLUS
9291 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9292 {
9293 offset = XEXP (XEXP (disp, 0), 1);
9294 disp = gen_rtx_CONST (VOIDmode,
9295 XEXP (XEXP (disp, 0), 0));
9296 }
9297
9298 if (flag_pic)
9299 output_pic_addr_const (file, disp, 0);
9300 else if (GET_CODE (disp) == LABEL_REF)
9301 output_asm_label (disp);
9302 else if (CONST_INT_P (disp))
9303 offset = disp;
9304 else
9305 output_addr_const (file, disp);
9306 }
9307
9308 putc ('[', file);
9309 if (base)
9310 {
9311 print_reg (base, 0, file);
9312 if (offset)
9313 {
9314 if (INTVAL (offset) >= 0)
9315 putc ('+', file);
9316 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9317 }
9318 }
9319 else if (offset)
9320 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9321 else
9322 putc ('0', file);
9323
9324 if (index)
9325 {
9326 putc ('+', file);
9327 print_reg (index, 0, file);
9328 if (scale != 1)
9329 fprintf (file, "*%d", scale);
9330 }
9331 putc (']', file);
9332 }
9333 }
9334 }
9335
9336 bool
9337 output_addr_const_extra (FILE *file, rtx x)
9338 {
9339 rtx op;
9340
9341 if (GET_CODE (x) != UNSPEC)
9342 return false;
9343
9344 op = XVECEXP (x, 0, 0);
9345 switch (XINT (x, 1))
9346 {
9347 case UNSPEC_GOTTPOFF:
9348 output_addr_const (file, op);
9349 /* FIXME: This might be @TPOFF in Sun ld. */
9350 fputs ("@GOTTPOFF", file);
9351 break;
9352 case UNSPEC_TPOFF:
9353 output_addr_const (file, op);
9354 fputs ("@TPOFF", file);
9355 break;
9356 case UNSPEC_NTPOFF:
9357 output_addr_const (file, op);
9358 if (TARGET_64BIT)
9359 fputs ("@TPOFF", file);
9360 else
9361 fputs ("@NTPOFF", file);
9362 break;
9363 case UNSPEC_DTPOFF:
9364 output_addr_const (file, op);
9365 fputs ("@DTPOFF", file);
9366 break;
9367 case UNSPEC_GOTNTPOFF:
9368 output_addr_const (file, op);
9369 if (TARGET_64BIT)
9370 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9371 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
9372 else
9373 fputs ("@GOTNTPOFF", file);
9374 break;
9375 case UNSPEC_INDNTPOFF:
9376 output_addr_const (file, op);
9377 fputs ("@INDNTPOFF", file);
9378 break;
9379
9380 default:
9381 return false;
9382 }
9383
9384 return true;
9385 }
9386 \f
9387 /* Split one or more DImode RTL references into pairs of SImode
9388 references. The RTL can be REG, offsettable MEM, integer constant, or
9389 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9390 split and "num" is its length. lo_half and hi_half are output arrays
9391 that parallel "operands". */
9392
9393 void
9394 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9395 {
9396 while (num--)
9397 {
9398 rtx op = operands[num];
9399
9400 /* simplify_subreg refuse to split volatile memory addresses,
9401 but we still have to handle it. */
9402 if (MEM_P (op))
9403 {
9404 lo_half[num] = adjust_address (op, SImode, 0);
9405 hi_half[num] = adjust_address (op, SImode, 4);
9406 }
9407 else
9408 {
9409 lo_half[num] = simplify_gen_subreg (SImode, op,
9410 GET_MODE (op) == VOIDmode
9411 ? DImode : GET_MODE (op), 0);
9412 hi_half[num] = simplify_gen_subreg (SImode, op,
9413 GET_MODE (op) == VOIDmode
9414 ? DImode : GET_MODE (op), 4);
9415 }
9416 }
9417 }
9418 /* Split one or more TImode RTL references into pairs of DImode
9419 references. The RTL can be REG, offsettable MEM, integer constant, or
9420 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9421 split and "num" is its length. lo_half and hi_half are output arrays
9422 that parallel "operands". */
9423
9424 void
9425 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9426 {
9427 while (num--)
9428 {
9429 rtx op = operands[num];
9430
9431 /* simplify_subreg refuse to split volatile memory addresses, but we
9432 still have to handle it. */
9433 if (MEM_P (op))
9434 {
9435 lo_half[num] = adjust_address (op, DImode, 0);
9436 hi_half[num] = adjust_address (op, DImode, 8);
9437 }
9438 else
9439 {
9440 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9441 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9442 }
9443 }
9444 }
9445 \f
9446 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9447 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9448 is the expression of the binary operation. The output may either be
9449 emitted here, or returned to the caller, like all output_* functions.
9450
9451 There is no guarantee that the operands are the same mode, as they
9452 might be within FLOAT or FLOAT_EXTEND expressions. */
9453
9454 #ifndef SYSV386_COMPAT
9455 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9456 wants to fix the assemblers because that causes incompatibility
9457 with gcc. No-one wants to fix gcc because that causes
9458 incompatibility with assemblers... You can use the option of
9459 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9460 #define SYSV386_COMPAT 1
9461 #endif
9462
9463 const char *
9464 output_387_binary_op (rtx insn, rtx *operands)
9465 {
9466 static char buf[30];
9467 const char *p;
9468 const char *ssep;
9469 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9470
9471 #ifdef ENABLE_CHECKING
9472 /* Even if we do not want to check the inputs, this documents input
9473 constraints. Which helps in understanding the following code. */
9474 if (STACK_REG_P (operands[0])
9475 && ((REG_P (operands[1])
9476 && REGNO (operands[0]) == REGNO (operands[1])
9477 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9478 || (REG_P (operands[2])
9479 && REGNO (operands[0]) == REGNO (operands[2])
9480 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9481 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9482 ; /* ok */
9483 else
9484 gcc_assert (is_sse);
9485 #endif
9486
9487 switch (GET_CODE (operands[3]))
9488 {
9489 case PLUS:
9490 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9491 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9492 p = "fiadd";
9493 else
9494 p = "fadd";
9495 ssep = "add";
9496 break;
9497
9498 case MINUS:
9499 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9500 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9501 p = "fisub";
9502 else
9503 p = "fsub";
9504 ssep = "sub";
9505 break;
9506
9507 case MULT:
9508 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9509 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9510 p = "fimul";
9511 else
9512 p = "fmul";
9513 ssep = "mul";
9514 break;
9515
9516 case DIV:
9517 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9518 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9519 p = "fidiv";
9520 else
9521 p = "fdiv";
9522 ssep = "div";
9523 break;
9524
9525 default:
9526 gcc_unreachable ();
9527 }
9528
9529 if (is_sse)
9530 {
9531 strcpy (buf, ssep);
9532 if (GET_MODE (operands[0]) == SFmode)
9533 strcat (buf, "ss\t{%2, %0|%0, %2}");
9534 else
9535 strcat (buf, "sd\t{%2, %0|%0, %2}");
9536 return buf;
9537 }
9538 strcpy (buf, p);
9539
9540 switch (GET_CODE (operands[3]))
9541 {
9542 case MULT:
9543 case PLUS:
9544 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9545 {
9546 rtx temp = operands[2];
9547 operands[2] = operands[1];
9548 operands[1] = temp;
9549 }
9550
9551 /* know operands[0] == operands[1]. */
9552
9553 if (MEM_P (operands[2]))
9554 {
9555 p = "%z2\t%2";
9556 break;
9557 }
9558
9559 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9560 {
9561 if (STACK_TOP_P (operands[0]))
9562 /* How is it that we are storing to a dead operand[2]?
9563 Well, presumably operands[1] is dead too. We can't
9564 store the result to st(0) as st(0) gets popped on this
9565 instruction. Instead store to operands[2] (which I
9566 think has to be st(1)). st(1) will be popped later.
9567 gcc <= 2.8.1 didn't have this check and generated
9568 assembly code that the Unixware assembler rejected. */
9569 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9570 else
9571 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9572 break;
9573 }
9574
9575 if (STACK_TOP_P (operands[0]))
9576 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9577 else
9578 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9579 break;
9580
9581 case MINUS:
9582 case DIV:
9583 if (MEM_P (operands[1]))
9584 {
9585 p = "r%z1\t%1";
9586 break;
9587 }
9588
9589 if (MEM_P (operands[2]))
9590 {
9591 p = "%z2\t%2";
9592 break;
9593 }
9594
9595 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9596 {
9597 #if SYSV386_COMPAT
9598 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9599 derived assemblers, confusingly reverse the direction of
9600 the operation for fsub{r} and fdiv{r} when the
9601 destination register is not st(0). The Intel assembler
9602 doesn't have this brain damage. Read !SYSV386_COMPAT to
9603 figure out what the hardware really does. */
9604 if (STACK_TOP_P (operands[0]))
9605 p = "{p\t%0, %2|rp\t%2, %0}";
9606 else
9607 p = "{rp\t%2, %0|p\t%0, %2}";
9608 #else
9609 if (STACK_TOP_P (operands[0]))
9610 /* As above for fmul/fadd, we can't store to st(0). */
9611 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9612 else
9613 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9614 #endif
9615 break;
9616 }
9617
9618 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9619 {
9620 #if SYSV386_COMPAT
9621 if (STACK_TOP_P (operands[0]))
9622 p = "{rp\t%0, %1|p\t%1, %0}";
9623 else
9624 p = "{p\t%1, %0|rp\t%0, %1}";
9625 #else
9626 if (STACK_TOP_P (operands[0]))
9627 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9628 else
9629 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9630 #endif
9631 break;
9632 }
9633
9634 if (STACK_TOP_P (operands[0]))
9635 {
9636 if (STACK_TOP_P (operands[1]))
9637 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9638 else
9639 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9640 break;
9641 }
9642 else if (STACK_TOP_P (operands[1]))
9643 {
9644 #if SYSV386_COMPAT
9645 p = "{\t%1, %0|r\t%0, %1}";
9646 #else
9647 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9648 #endif
9649 }
9650 else
9651 {
9652 #if SYSV386_COMPAT
9653 p = "{r\t%2, %0|\t%0, %2}";
9654 #else
9655 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9656 #endif
9657 }
9658 break;
9659
9660 default:
9661 gcc_unreachable ();
9662 }
9663
9664 strcat (buf, p);
9665 return buf;
9666 }
9667
9668 /* Return needed mode for entity in optimize_mode_switching pass. */
9669
9670 int
9671 ix86_mode_needed (int entity, rtx insn)
9672 {
9673 enum attr_i387_cw mode;
9674
9675 /* The mode UNINITIALIZED is used to store control word after a
9676 function call or ASM pattern. The mode ANY specify that function
9677 has no requirements on the control word and make no changes in the
9678 bits we are interested in. */
9679
9680 if (CALL_P (insn)
9681 || (NONJUMP_INSN_P (insn)
9682 && (asm_noperands (PATTERN (insn)) >= 0
9683 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9684 return I387_CW_UNINITIALIZED;
9685
9686 if (recog_memoized (insn) < 0)
9687 return I387_CW_ANY;
9688
9689 mode = get_attr_i387_cw (insn);
9690
9691 switch (entity)
9692 {
9693 case I387_TRUNC:
9694 if (mode == I387_CW_TRUNC)
9695 return mode;
9696 break;
9697
9698 case I387_FLOOR:
9699 if (mode == I387_CW_FLOOR)
9700 return mode;
9701 break;
9702
9703 case I387_CEIL:
9704 if (mode == I387_CW_CEIL)
9705 return mode;
9706 break;
9707
9708 case I387_MASK_PM:
9709 if (mode == I387_CW_MASK_PM)
9710 return mode;
9711 break;
9712
9713 default:
9714 gcc_unreachable ();
9715 }
9716
9717 return I387_CW_ANY;
9718 }
9719
9720 /* Output code to initialize control word copies used by trunc?f?i and
9721 rounding patterns. CURRENT_MODE is set to current control word,
9722 while NEW_MODE is set to new control word. */
9723
9724 void
9725 emit_i387_cw_initialization (int mode)
9726 {
9727 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9728 rtx new_mode;
9729
9730 enum ix86_stack_slot slot;
9731
9732 rtx reg = gen_reg_rtx (HImode);
9733
9734 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9735 emit_move_insn (reg, copy_rtx (stored_mode));
9736
9737 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9738 {
9739 switch (mode)
9740 {
9741 case I387_CW_TRUNC:
9742 /* round toward zero (truncate) */
9743 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9744 slot = SLOT_CW_TRUNC;
9745 break;
9746
9747 case I387_CW_FLOOR:
9748 /* round down toward -oo */
9749 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9750 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9751 slot = SLOT_CW_FLOOR;
9752 break;
9753
9754 case I387_CW_CEIL:
9755 /* round up toward +oo */
9756 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9757 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9758 slot = SLOT_CW_CEIL;
9759 break;
9760
9761 case I387_CW_MASK_PM:
9762 /* mask precision exception for nearbyint() */
9763 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9764 slot = SLOT_CW_MASK_PM;
9765 break;
9766
9767 default:
9768 gcc_unreachable ();
9769 }
9770 }
9771 else
9772 {
9773 switch (mode)
9774 {
9775 case I387_CW_TRUNC:
9776 /* round toward zero (truncate) */
9777 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9778 slot = SLOT_CW_TRUNC;
9779 break;
9780
9781 case I387_CW_FLOOR:
9782 /* round down toward -oo */
9783 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9784 slot = SLOT_CW_FLOOR;
9785 break;
9786
9787 case I387_CW_CEIL:
9788 /* round up toward +oo */
9789 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9790 slot = SLOT_CW_CEIL;
9791 break;
9792
9793 case I387_CW_MASK_PM:
9794 /* mask precision exception for nearbyint() */
9795 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9796 slot = SLOT_CW_MASK_PM;
9797 break;
9798
9799 default:
9800 gcc_unreachable ();
9801 }
9802 }
9803
9804 gcc_assert (slot < MAX_386_STACK_LOCALS);
9805
9806 new_mode = assign_386_stack_local (HImode, slot);
9807 emit_move_insn (new_mode, reg);
9808 }
9809
9810 /* Output code for INSN to convert a float to a signed int. OPERANDS
9811 are the insn operands. The output may be [HSD]Imode and the input
9812 operand may be [SDX]Fmode. */
9813
9814 const char *
9815 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9816 {
9817 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9818 int dimode_p = GET_MODE (operands[0]) == DImode;
9819 int round_mode = get_attr_i387_cw (insn);
9820
9821 /* Jump through a hoop or two for DImode, since the hardware has no
9822 non-popping instruction. We used to do this a different way, but
9823 that was somewhat fragile and broke with post-reload splitters. */
9824 if ((dimode_p || fisttp) && !stack_top_dies)
9825 output_asm_insn ("fld\t%y1", operands);
9826
9827 gcc_assert (STACK_TOP_P (operands[1]));
9828 gcc_assert (MEM_P (operands[0]));
9829 gcc_assert (GET_MODE (operands[1]) != TFmode);
9830
9831 if (fisttp)
9832 output_asm_insn ("fisttp%z0\t%0", operands);
9833 else
9834 {
9835 if (round_mode != I387_CW_ANY)
9836 output_asm_insn ("fldcw\t%3", operands);
9837 if (stack_top_dies || dimode_p)
9838 output_asm_insn ("fistp%z0\t%0", operands);
9839 else
9840 output_asm_insn ("fist%z0\t%0", operands);
9841 if (round_mode != I387_CW_ANY)
9842 output_asm_insn ("fldcw\t%2", operands);
9843 }
9844
9845 return "";
9846 }
9847
9848 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9849 have the values zero or one, indicates the ffreep insn's operand
9850 from the OPERANDS array. */
9851
9852 static const char *
9853 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9854 {
9855 if (TARGET_USE_FFREEP)
9856 #if HAVE_AS_IX86_FFREEP
9857 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9858 #else
9859 {
9860 static char retval[] = ".word\t0xc_df";
9861 int regno = REGNO (operands[opno]);
9862
9863 gcc_assert (FP_REGNO_P (regno));
9864
9865 retval[9] = '0' + (regno - FIRST_STACK_REG);
9866 return retval;
9867 }
9868 #endif
9869
9870 return opno ? "fstp\t%y1" : "fstp\t%y0";
9871 }
9872
9873
9874 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9875 should be used. UNORDERED_P is true when fucom should be used. */
9876
9877 const char *
9878 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9879 {
9880 int stack_top_dies;
9881 rtx cmp_op0, cmp_op1;
9882 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9883
9884 if (eflags_p)
9885 {
9886 cmp_op0 = operands[0];
9887 cmp_op1 = operands[1];
9888 }
9889 else
9890 {
9891 cmp_op0 = operands[1];
9892 cmp_op1 = operands[2];
9893 }
9894
9895 if (is_sse)
9896 {
9897 if (GET_MODE (operands[0]) == SFmode)
9898 if (unordered_p)
9899 return "ucomiss\t{%1, %0|%0, %1}";
9900 else
9901 return "comiss\t{%1, %0|%0, %1}";
9902 else
9903 if (unordered_p)
9904 return "ucomisd\t{%1, %0|%0, %1}";
9905 else
9906 return "comisd\t{%1, %0|%0, %1}";
9907 }
9908
9909 gcc_assert (STACK_TOP_P (cmp_op0));
9910
9911 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9912
9913 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9914 {
9915 if (stack_top_dies)
9916 {
9917 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9918 return output_387_ffreep (operands, 1);
9919 }
9920 else
9921 return "ftst\n\tfnstsw\t%0";
9922 }
9923
9924 if (STACK_REG_P (cmp_op1)
9925 && stack_top_dies
9926 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9927 && REGNO (cmp_op1) != FIRST_STACK_REG)
9928 {
9929 /* If both the top of the 387 stack dies, and the other operand
9930 is also a stack register that dies, then this must be a
9931 `fcompp' float compare */
9932
9933 if (eflags_p)
9934 {
9935 /* There is no double popping fcomi variant. Fortunately,
9936 eflags is immune from the fstp's cc clobbering. */
9937 if (unordered_p)
9938 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9939 else
9940 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9941 return output_387_ffreep (operands, 0);
9942 }
9943 else
9944 {
9945 if (unordered_p)
9946 return "fucompp\n\tfnstsw\t%0";
9947 else
9948 return "fcompp\n\tfnstsw\t%0";
9949 }
9950 }
9951 else
9952 {
9953 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9954
9955 static const char * const alt[16] =
9956 {
9957 "fcom%z2\t%y2\n\tfnstsw\t%0",
9958 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9959 "fucom%z2\t%y2\n\tfnstsw\t%0",
9960 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9961
9962 "ficom%z2\t%y2\n\tfnstsw\t%0",
9963 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9964 NULL,
9965 NULL,
9966
9967 "fcomi\t{%y1, %0|%0, %y1}",
9968 "fcomip\t{%y1, %0|%0, %y1}",
9969 "fucomi\t{%y1, %0|%0, %y1}",
9970 "fucomip\t{%y1, %0|%0, %y1}",
9971
9972 NULL,
9973 NULL,
9974 NULL,
9975 NULL
9976 };
9977
9978 int mask;
9979 const char *ret;
9980
9981 mask = eflags_p << 3;
9982 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9983 mask |= unordered_p << 1;
9984 mask |= stack_top_dies;
9985
9986 gcc_assert (mask < 16);
9987 ret = alt[mask];
9988 gcc_assert (ret);
9989
9990 return ret;
9991 }
9992 }
9993
9994 void
9995 ix86_output_addr_vec_elt (FILE *file, int value)
9996 {
9997 const char *directive = ASM_LONG;
9998
9999 #ifdef ASM_QUAD
10000 if (TARGET_64BIT)
10001 directive = ASM_QUAD;
10002 #else
10003 gcc_assert (!TARGET_64BIT);
10004 #endif
10005
10006 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
10007 }
10008
10009 void
10010 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
10011 {
10012 const char *directive = ASM_LONG;
10013
10014 #ifdef ASM_QUAD
10015 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
10016 directive = ASM_QUAD;
10017 #else
10018 gcc_assert (!TARGET_64BIT);
10019 #endif
10020 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
10021 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
10022 fprintf (file, "%s%s%d-%s%d\n",
10023 directive, LPREFIX, value, LPREFIX, rel);
10024 else if (HAVE_AS_GOTOFF_IN_DATA)
10025 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
10026 #if TARGET_MACHO
10027 else if (TARGET_MACHO)
10028 {
10029 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
10030 machopic_output_function_base_name (file);
10031 fprintf(file, "\n");
10032 }
10033 #endif
10034 else
10035 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
10036 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
10037 }
10038 \f
10039 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
10040 for the target. */
10041
10042 void
10043 ix86_expand_clear (rtx dest)
10044 {
10045 rtx tmp;
10046
10047 /* We play register width games, which are only valid after reload. */
10048 gcc_assert (reload_completed);
10049
10050 /* Avoid HImode and its attendant prefix byte. */
10051 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
10052 dest = gen_rtx_REG (SImode, REGNO (dest));
10053 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
10054
10055 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
10056 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
10057 {
10058 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10059 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10060 }
10061
10062 emit_insn (tmp);
10063 }
10064
10065 /* X is an unchanging MEM. If it is a constant pool reference, return
10066 the constant pool rtx, else NULL. */
10067
10068 rtx
10069 maybe_get_pool_constant (rtx x)
10070 {
10071 x = ix86_delegitimize_address (XEXP (x, 0));
10072
10073 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
10074 return get_pool_constant (x);
10075
10076 return NULL_RTX;
10077 }
10078
10079 void
10080 ix86_expand_move (enum machine_mode mode, rtx operands[])
10081 {
10082 rtx op0, op1;
10083 enum tls_model model;
10084
10085 op0 = operands[0];
10086 op1 = operands[1];
10087
10088 if (GET_CODE (op1) == SYMBOL_REF)
10089 {
10090 model = SYMBOL_REF_TLS_MODEL (op1);
10091 if (model)
10092 {
10093 op1 = legitimize_tls_address (op1, model, true);
10094 op1 = force_operand (op1, op0);
10095 if (op1 == op0)
10096 return;
10097 }
10098 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10099 && SYMBOL_REF_DLLIMPORT_P (op1))
10100 op1 = legitimize_dllimport_symbol (op1, false);
10101 }
10102 else if (GET_CODE (op1) == CONST
10103 && GET_CODE (XEXP (op1, 0)) == PLUS
10104 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
10105 {
10106 rtx addend = XEXP (XEXP (op1, 0), 1);
10107 rtx symbol = XEXP (XEXP (op1, 0), 0);
10108 rtx tmp = NULL;
10109
10110 model = SYMBOL_REF_TLS_MODEL (symbol);
10111 if (model)
10112 tmp = legitimize_tls_address (symbol, model, true);
10113 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10114 && SYMBOL_REF_DLLIMPORT_P (symbol))
10115 tmp = legitimize_dllimport_symbol (symbol, true);
10116
10117 if (tmp)
10118 {
10119 tmp = force_operand (tmp, NULL);
10120 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
10121 op0, 1, OPTAB_DIRECT);
10122 if (tmp == op0)
10123 return;
10124 }
10125 }
10126
10127 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
10128 {
10129 if (TARGET_MACHO && !TARGET_64BIT)
10130 {
10131 #if TARGET_MACHO
10132 if (MACHOPIC_PURE)
10133 {
10134 rtx temp = ((reload_in_progress
10135 || ((op0 && REG_P (op0))
10136 && mode == Pmode))
10137 ? op0 : gen_reg_rtx (Pmode));
10138 op1 = machopic_indirect_data_reference (op1, temp);
10139 op1 = machopic_legitimize_pic_address (op1, mode,
10140 temp == op1 ? 0 : temp);
10141 }
10142 else if (MACHOPIC_INDIRECT)
10143 op1 = machopic_indirect_data_reference (op1, 0);
10144 if (op0 == op1)
10145 return;
10146 #endif
10147 }
10148 else
10149 {
10150 if (MEM_P (op0))
10151 op1 = force_reg (Pmode, op1);
10152 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
10153 {
10154 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
10155 op1 = legitimize_pic_address (op1, reg);
10156 if (op0 == op1)
10157 return;
10158 }
10159 }
10160 }
10161 else
10162 {
10163 if (MEM_P (op0)
10164 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
10165 || !push_operand (op0, mode))
10166 && MEM_P (op1))
10167 op1 = force_reg (mode, op1);
10168
10169 if (push_operand (op0, mode)
10170 && ! general_no_elim_operand (op1, mode))
10171 op1 = copy_to_mode_reg (mode, op1);
10172
10173 /* Force large constants in 64bit compilation into register
10174 to get them CSEed. */
10175 if (can_create_pseudo_p ()
10176 && (mode == DImode) && TARGET_64BIT
10177 && immediate_operand (op1, mode)
10178 && !x86_64_zext_immediate_operand (op1, VOIDmode)
10179 && !register_operand (op0, mode)
10180 && optimize)
10181 op1 = copy_to_mode_reg (mode, op1);
10182
10183 if (can_create_pseudo_p ()
10184 && FLOAT_MODE_P (mode)
10185 && GET_CODE (op1) == CONST_DOUBLE)
10186 {
10187 /* If we are loading a floating point constant to a register,
10188 force the value to memory now, since we'll get better code
10189 out the back end. */
10190
10191 op1 = validize_mem (force_const_mem (mode, op1));
10192 if (!register_operand (op0, mode))
10193 {
10194 rtx temp = gen_reg_rtx (mode);
10195 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
10196 emit_move_insn (op0, temp);
10197 return;
10198 }
10199 }
10200 }
10201
10202 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10203 }
10204
10205 void
10206 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
10207 {
10208 rtx op0 = operands[0], op1 = operands[1];
10209 unsigned int align = GET_MODE_ALIGNMENT (mode);
10210
10211 /* Force constants other than zero into memory. We do not know how
10212 the instructions used to build constants modify the upper 64 bits
10213 of the register, once we have that information we may be able
10214 to handle some of them more efficiently. */
10215 if (can_create_pseudo_p ()
10216 && register_operand (op0, mode)
10217 && (CONSTANT_P (op1)
10218 || (GET_CODE (op1) == SUBREG
10219 && CONSTANT_P (SUBREG_REG (op1))))
10220 && standard_sse_constant_p (op1) <= 0)
10221 op1 = validize_mem (force_const_mem (mode, op1));
10222
10223 /* TDmode values are passed as TImode on the stack. TImode values
10224 are moved via xmm registers, and moving them to stack can result in
10225 unaligned memory access. Use ix86_expand_vector_move_misalign()
10226 if memory operand is not aligned correctly. */
10227 if (can_create_pseudo_p ()
10228 && (mode == TImode) && !TARGET_64BIT
10229 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
10230 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
10231 {
10232 rtx tmp[2];
10233
10234 /* ix86_expand_vector_move_misalign() does not like constants ... */
10235 if (CONSTANT_P (op1)
10236 || (GET_CODE (op1) == SUBREG
10237 && CONSTANT_P (SUBREG_REG (op1))))
10238 op1 = validize_mem (force_const_mem (mode, op1));
10239
10240 /* ... nor both arguments in memory. */
10241 if (!register_operand (op0, mode)
10242 && !register_operand (op1, mode))
10243 op1 = force_reg (mode, op1);
10244
10245 tmp[0] = op0; tmp[1] = op1;
10246 ix86_expand_vector_move_misalign (mode, tmp);
10247 return;
10248 }
10249
10250 /* Make operand1 a register if it isn't already. */
10251 if (can_create_pseudo_p ()
10252 && !register_operand (op0, mode)
10253 && !register_operand (op1, mode))
10254 {
10255 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
10256 return;
10257 }
10258
10259 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10260 }
10261
10262 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
10263 straight to ix86_expand_vector_move. */
10264 /* Code generation for scalar reg-reg moves of single and double precision data:
10265 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10266 movaps reg, reg
10267 else
10268 movss reg, reg
10269 if (x86_sse_partial_reg_dependency == true)
10270 movapd reg, reg
10271 else
10272 movsd reg, reg
10273
10274 Code generation for scalar loads of double precision data:
10275 if (x86_sse_split_regs == true)
10276 movlpd mem, reg (gas syntax)
10277 else
10278 movsd mem, reg
10279
10280 Code generation for unaligned packed loads of single precision data
10281 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10282 if (x86_sse_unaligned_move_optimal)
10283 movups mem, reg
10284
10285 if (x86_sse_partial_reg_dependency == true)
10286 {
10287 xorps reg, reg
10288 movlps mem, reg
10289 movhps mem+8, reg
10290 }
10291 else
10292 {
10293 movlps mem, reg
10294 movhps mem+8, reg
10295 }
10296
10297 Code generation for unaligned packed loads of double precision data
10298 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10299 if (x86_sse_unaligned_move_optimal)
10300 movupd mem, reg
10301
10302 if (x86_sse_split_regs == true)
10303 {
10304 movlpd mem, reg
10305 movhpd mem+8, reg
10306 }
10307 else
10308 {
10309 movsd mem, reg
10310 movhpd mem+8, reg
10311 }
10312 */
10313
10314 void
10315 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10316 {
10317 rtx op0, op1, m;
10318
10319 op0 = operands[0];
10320 op1 = operands[1];
10321
10322 if (MEM_P (op1))
10323 {
10324 /* If we're optimizing for size, movups is the smallest. */
10325 if (optimize_size)
10326 {
10327 op0 = gen_lowpart (V4SFmode, op0);
10328 op1 = gen_lowpart (V4SFmode, op1);
10329 emit_insn (gen_sse_movups (op0, op1));
10330 return;
10331 }
10332
10333 /* ??? If we have typed data, then it would appear that using
10334 movdqu is the only way to get unaligned data loaded with
10335 integer type. */
10336 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10337 {
10338 op0 = gen_lowpart (V16QImode, op0);
10339 op1 = gen_lowpart (V16QImode, op1);
10340 emit_insn (gen_sse2_movdqu (op0, op1));
10341 return;
10342 }
10343
10344 if (TARGET_SSE2 && mode == V2DFmode)
10345 {
10346 rtx zero;
10347
10348 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10349 {
10350 op0 = gen_lowpart (V2DFmode, op0);
10351 op1 = gen_lowpart (V2DFmode, op1);
10352 emit_insn (gen_sse2_movupd (op0, op1));
10353 return;
10354 }
10355
10356 /* When SSE registers are split into halves, we can avoid
10357 writing to the top half twice. */
10358 if (TARGET_SSE_SPLIT_REGS)
10359 {
10360 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10361 zero = op0;
10362 }
10363 else
10364 {
10365 /* ??? Not sure about the best option for the Intel chips.
10366 The following would seem to satisfy; the register is
10367 entirely cleared, breaking the dependency chain. We
10368 then store to the upper half, with a dependency depth
10369 of one. A rumor has it that Intel recommends two movsd
10370 followed by an unpacklpd, but this is unconfirmed. And
10371 given that the dependency depth of the unpacklpd would
10372 still be one, I'm not sure why this would be better. */
10373 zero = CONST0_RTX (V2DFmode);
10374 }
10375
10376 m = adjust_address (op1, DFmode, 0);
10377 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10378 m = adjust_address (op1, DFmode, 8);
10379 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10380 }
10381 else
10382 {
10383 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10384 {
10385 op0 = gen_lowpart (V4SFmode, op0);
10386 op1 = gen_lowpart (V4SFmode, op1);
10387 emit_insn (gen_sse_movups (op0, op1));
10388 return;
10389 }
10390
10391 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10392 emit_move_insn (op0, CONST0_RTX (mode));
10393 else
10394 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10395
10396 if (mode != V4SFmode)
10397 op0 = gen_lowpart (V4SFmode, op0);
10398 m = adjust_address (op1, V2SFmode, 0);
10399 emit_insn (gen_sse_loadlps (op0, op0, m));
10400 m = adjust_address (op1, V2SFmode, 8);
10401 emit_insn (gen_sse_loadhps (op0, op0, m));
10402 }
10403 }
10404 else if (MEM_P (op0))
10405 {
10406 /* If we're optimizing for size, movups is the smallest. */
10407 if (optimize_size)
10408 {
10409 op0 = gen_lowpart (V4SFmode, op0);
10410 op1 = gen_lowpart (V4SFmode, op1);
10411 emit_insn (gen_sse_movups (op0, op1));
10412 return;
10413 }
10414
10415 /* ??? Similar to above, only less clear because of quote
10416 typeless stores unquote. */
10417 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10418 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10419 {
10420 op0 = gen_lowpart (V16QImode, op0);
10421 op1 = gen_lowpart (V16QImode, op1);
10422 emit_insn (gen_sse2_movdqu (op0, op1));
10423 return;
10424 }
10425
10426 if (TARGET_SSE2 && mode == V2DFmode)
10427 {
10428 m = adjust_address (op0, DFmode, 0);
10429 emit_insn (gen_sse2_storelpd (m, op1));
10430 m = adjust_address (op0, DFmode, 8);
10431 emit_insn (gen_sse2_storehpd (m, op1));
10432 }
10433 else
10434 {
10435 if (mode != V4SFmode)
10436 op1 = gen_lowpart (V4SFmode, op1);
10437 m = adjust_address (op0, V2SFmode, 0);
10438 emit_insn (gen_sse_storelps (m, op1));
10439 m = adjust_address (op0, V2SFmode, 8);
10440 emit_insn (gen_sse_storehps (m, op1));
10441 }
10442 }
10443 else
10444 gcc_unreachable ();
10445 }
10446
10447 /* Expand a push in MODE. This is some mode for which we do not support
10448 proper push instructions, at least from the registers that we expect
10449 the value to live in. */
10450
10451 void
10452 ix86_expand_push (enum machine_mode mode, rtx x)
10453 {
10454 rtx tmp;
10455
10456 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10457 GEN_INT (-GET_MODE_SIZE (mode)),
10458 stack_pointer_rtx, 1, OPTAB_DIRECT);
10459 if (tmp != stack_pointer_rtx)
10460 emit_move_insn (stack_pointer_rtx, tmp);
10461
10462 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10463 emit_move_insn (tmp, x);
10464 }
10465
10466 /* Helper function of ix86_fixup_binary_operands to canonicalize
10467 operand order. Returns true if the operands should be swapped. */
10468
10469 static bool
10470 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10471 rtx operands[])
10472 {
10473 rtx dst = operands[0];
10474 rtx src1 = operands[1];
10475 rtx src2 = operands[2];
10476
10477 /* If the operation is not commutative, we can't do anything. */
10478 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10479 return false;
10480
10481 /* Highest priority is that src1 should match dst. */
10482 if (rtx_equal_p (dst, src1))
10483 return false;
10484 if (rtx_equal_p (dst, src2))
10485 return true;
10486
10487 /* Next highest priority is that immediate constants come second. */
10488 if (immediate_operand (src2, mode))
10489 return false;
10490 if (immediate_operand (src1, mode))
10491 return true;
10492
10493 /* Lowest priority is that memory references should come second. */
10494 if (MEM_P (src2))
10495 return false;
10496 if (MEM_P (src1))
10497 return true;
10498
10499 return false;
10500 }
10501
10502
10503 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10504 destination to use for the operation. If different from the true
10505 destination in operands[0], a copy operation will be required. */
10506
10507 rtx
10508 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10509 rtx operands[])
10510 {
10511 rtx dst = operands[0];
10512 rtx src1 = operands[1];
10513 rtx src2 = operands[2];
10514
10515 /* Canonicalize operand order. */
10516 if (ix86_swap_binary_operands_p (code, mode, operands))
10517 {
10518 rtx temp = src1;
10519 src1 = src2;
10520 src2 = temp;
10521 }
10522
10523 /* Both source operands cannot be in memory. */
10524 if (MEM_P (src1) && MEM_P (src2))
10525 {
10526 /* Optimization: Only read from memory once. */
10527 if (rtx_equal_p (src1, src2))
10528 {
10529 src2 = force_reg (mode, src2);
10530 src1 = src2;
10531 }
10532 else
10533 src2 = force_reg (mode, src2);
10534 }
10535
10536 /* If the destination is memory, and we do not have matching source
10537 operands, do things in registers. */
10538 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10539 dst = gen_reg_rtx (mode);
10540
10541 /* Source 1 cannot be a constant. */
10542 if (CONSTANT_P (src1))
10543 src1 = force_reg (mode, src1);
10544
10545 /* Source 1 cannot be a non-matching memory. */
10546 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10547 src1 = force_reg (mode, src1);
10548
10549 operands[1] = src1;
10550 operands[2] = src2;
10551 return dst;
10552 }
10553
10554 /* Similarly, but assume that the destination has already been
10555 set up properly. */
10556
10557 void
10558 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10559 enum machine_mode mode, rtx operands[])
10560 {
10561 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10562 gcc_assert (dst == operands[0]);
10563 }
10564
10565 /* Attempt to expand a binary operator. Make the expansion closer to the
10566 actual machine, then just general_operand, which will allow 3 separate
10567 memory references (one output, two input) in a single insn. */
10568
10569 void
10570 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10571 rtx operands[])
10572 {
10573 rtx src1, src2, dst, op, clob;
10574
10575 dst = ix86_fixup_binary_operands (code, mode, operands);
10576 src1 = operands[1];
10577 src2 = operands[2];
10578
10579 /* Emit the instruction. */
10580
10581 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10582 if (reload_in_progress)
10583 {
10584 /* Reload doesn't know about the flags register, and doesn't know that
10585 it doesn't want to clobber it. We can only do this with PLUS. */
10586 gcc_assert (code == PLUS);
10587 emit_insn (op);
10588 }
10589 else
10590 {
10591 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10592 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10593 }
10594
10595 /* Fix up the destination if needed. */
10596 if (dst != operands[0])
10597 emit_move_insn (operands[0], dst);
10598 }
10599
10600 /* Return TRUE or FALSE depending on whether the binary operator meets the
10601 appropriate constraints. */
10602
10603 int
10604 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10605 rtx operands[3])
10606 {
10607 rtx dst = operands[0];
10608 rtx src1 = operands[1];
10609 rtx src2 = operands[2];
10610
10611 /* Both source operands cannot be in memory. */
10612 if (MEM_P (src1) && MEM_P (src2))
10613 return 0;
10614
10615 /* Canonicalize operand order for commutative operators. */
10616 if (ix86_swap_binary_operands_p (code, mode, operands))
10617 {
10618 rtx temp = src1;
10619 src1 = src2;
10620 src2 = temp;
10621 }
10622
10623 /* If the destination is memory, we must have a matching source operand. */
10624 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10625 return 0;
10626
10627 /* Source 1 cannot be a constant. */
10628 if (CONSTANT_P (src1))
10629 return 0;
10630
10631 /* Source 1 cannot be a non-matching memory. */
10632 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10633 return 0;
10634
10635 return 1;
10636 }
10637
10638 /* Attempt to expand a unary operator. Make the expansion closer to the
10639 actual machine, then just general_operand, which will allow 2 separate
10640 memory references (one output, one input) in a single insn. */
10641
10642 void
10643 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10644 rtx operands[])
10645 {
10646 int matching_memory;
10647 rtx src, dst, op, clob;
10648
10649 dst = operands[0];
10650 src = operands[1];
10651
10652 /* If the destination is memory, and we do not have matching source
10653 operands, do things in registers. */
10654 matching_memory = 0;
10655 if (MEM_P (dst))
10656 {
10657 if (rtx_equal_p (dst, src))
10658 matching_memory = 1;
10659 else
10660 dst = gen_reg_rtx (mode);
10661 }
10662
10663 /* When source operand is memory, destination must match. */
10664 if (MEM_P (src) && !matching_memory)
10665 src = force_reg (mode, src);
10666
10667 /* Emit the instruction. */
10668
10669 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10670 if (reload_in_progress || code == NOT)
10671 {
10672 /* Reload doesn't know about the flags register, and doesn't know that
10673 it doesn't want to clobber it. */
10674 gcc_assert (code == NOT);
10675 emit_insn (op);
10676 }
10677 else
10678 {
10679 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10680 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10681 }
10682
10683 /* Fix up the destination if needed. */
10684 if (dst != operands[0])
10685 emit_move_insn (operands[0], dst);
10686 }
10687
10688 /* Return TRUE or FALSE depending on whether the unary operator meets the
10689 appropriate constraints. */
10690
10691 int
10692 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10693 enum machine_mode mode ATTRIBUTE_UNUSED,
10694 rtx operands[2] ATTRIBUTE_UNUSED)
10695 {
10696 /* If one of operands is memory, source and destination must match. */
10697 if ((MEM_P (operands[0])
10698 || MEM_P (operands[1]))
10699 && ! rtx_equal_p (operands[0], operands[1]))
10700 return FALSE;
10701 return TRUE;
10702 }
10703
10704 /* Post-reload splitter for converting an SF or DFmode value in an
10705 SSE register into an unsigned SImode. */
10706
10707 void
10708 ix86_split_convert_uns_si_sse (rtx operands[])
10709 {
10710 enum machine_mode vecmode;
10711 rtx value, large, zero_or_two31, input, two31, x;
10712
10713 large = operands[1];
10714 zero_or_two31 = operands[2];
10715 input = operands[3];
10716 two31 = operands[4];
10717 vecmode = GET_MODE (large);
10718 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10719
10720 /* Load up the value into the low element. We must ensure that the other
10721 elements are valid floats -- zero is the easiest such value. */
10722 if (MEM_P (input))
10723 {
10724 if (vecmode == V4SFmode)
10725 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10726 else
10727 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10728 }
10729 else
10730 {
10731 input = gen_rtx_REG (vecmode, REGNO (input));
10732 emit_move_insn (value, CONST0_RTX (vecmode));
10733 if (vecmode == V4SFmode)
10734 emit_insn (gen_sse_movss (value, value, input));
10735 else
10736 emit_insn (gen_sse2_movsd (value, value, input));
10737 }
10738
10739 emit_move_insn (large, two31);
10740 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10741
10742 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10743 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10744
10745 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10746 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10747
10748 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10749 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10750
10751 large = gen_rtx_REG (V4SImode, REGNO (large));
10752 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10753
10754 x = gen_rtx_REG (V4SImode, REGNO (value));
10755 if (vecmode == V4SFmode)
10756 emit_insn (gen_sse2_cvttps2dq (x, value));
10757 else
10758 emit_insn (gen_sse2_cvttpd2dq (x, value));
10759 value = x;
10760
10761 emit_insn (gen_xorv4si3 (value, value, large));
10762 }
10763
10764 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10765 Expects the 64-bit DImode to be supplied in a pair of integral
10766 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10767 -mfpmath=sse, !optimize_size only. */
10768
10769 void
10770 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10771 {
10772 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10773 rtx int_xmm, fp_xmm;
10774 rtx biases, exponents;
10775 rtx x;
10776
10777 int_xmm = gen_reg_rtx (V4SImode);
10778 if (TARGET_INTER_UNIT_MOVES)
10779 emit_insn (gen_movdi_to_sse (int_xmm, input));
10780 else if (TARGET_SSE_SPLIT_REGS)
10781 {
10782 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10783 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10784 }
10785 else
10786 {
10787 x = gen_reg_rtx (V2DImode);
10788 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10789 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10790 }
10791
10792 x = gen_rtx_CONST_VECTOR (V4SImode,
10793 gen_rtvec (4, GEN_INT (0x43300000UL),
10794 GEN_INT (0x45300000UL),
10795 const0_rtx, const0_rtx));
10796 exponents = validize_mem (force_const_mem (V4SImode, x));
10797
10798 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10799 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10800
10801 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10802 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10803 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10804 (0x1.0p84 + double(fp_value_hi_xmm)).
10805 Note these exponents differ by 32. */
10806
10807 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10808
10809 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10810 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10811 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10812 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10813 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10814 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10815 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10816 biases = validize_mem (force_const_mem (V2DFmode, biases));
10817 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10818
10819 /* Add the upper and lower DFmode values together. */
10820 if (TARGET_SSE3)
10821 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10822 else
10823 {
10824 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10825 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10826 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10827 }
10828
10829 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10830 }
10831
10832 /* Convert an unsigned SImode value into a DFmode. Only currently used
10833 for SSE, but applicable anywhere. */
10834
10835 void
10836 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10837 {
10838 REAL_VALUE_TYPE TWO31r;
10839 rtx x, fp;
10840
10841 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10842 NULL, 1, OPTAB_DIRECT);
10843
10844 fp = gen_reg_rtx (DFmode);
10845 emit_insn (gen_floatsidf2 (fp, x));
10846
10847 real_ldexp (&TWO31r, &dconst1, 31);
10848 x = const_double_from_real_value (TWO31r, DFmode);
10849
10850 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10851 if (x != target)
10852 emit_move_insn (target, x);
10853 }
10854
10855 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10856 32-bit mode; otherwise we have a direct convert instruction. */
10857
10858 void
10859 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10860 {
10861 REAL_VALUE_TYPE TWO32r;
10862 rtx fp_lo, fp_hi, x;
10863
10864 fp_lo = gen_reg_rtx (DFmode);
10865 fp_hi = gen_reg_rtx (DFmode);
10866
10867 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10868
10869 real_ldexp (&TWO32r, &dconst1, 32);
10870 x = const_double_from_real_value (TWO32r, DFmode);
10871 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10872
10873 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10874
10875 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10876 0, OPTAB_DIRECT);
10877 if (x != target)
10878 emit_move_insn (target, x);
10879 }
10880
10881 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10882 For x86_32, -mfpmath=sse, !optimize_size only. */
10883 void
10884 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10885 {
10886 REAL_VALUE_TYPE ONE16r;
10887 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10888
10889 real_ldexp (&ONE16r, &dconst1, 16);
10890 x = const_double_from_real_value (ONE16r, SFmode);
10891 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10892 NULL, 0, OPTAB_DIRECT);
10893 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10894 NULL, 0, OPTAB_DIRECT);
10895 fp_hi = gen_reg_rtx (SFmode);
10896 fp_lo = gen_reg_rtx (SFmode);
10897 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10898 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10899 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10900 0, OPTAB_DIRECT);
10901 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10902 0, OPTAB_DIRECT);
10903 if (!rtx_equal_p (target, fp_hi))
10904 emit_move_insn (target, fp_hi);
10905 }
10906
10907 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10908 then replicate the value for all elements of the vector
10909 register. */
10910
10911 rtx
10912 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10913 {
10914 rtvec v;
10915 switch (mode)
10916 {
10917 case SImode:
10918 gcc_assert (vect);
10919 v = gen_rtvec (4, value, value, value, value);
10920 return gen_rtx_CONST_VECTOR (V4SImode, v);
10921
10922 case DImode:
10923 gcc_assert (vect);
10924 v = gen_rtvec (2, value, value);
10925 return gen_rtx_CONST_VECTOR (V2DImode, v);
10926
10927 case SFmode:
10928 if (vect)
10929 v = gen_rtvec (4, value, value, value, value);
10930 else
10931 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10932 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10933 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10934
10935 case DFmode:
10936 if (vect)
10937 v = gen_rtvec (2, value, value);
10938 else
10939 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10940 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10941
10942 default:
10943 gcc_unreachable ();
10944 }
10945 }
10946
10947 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10948 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10949 for an SSE register. If VECT is true, then replicate the mask for
10950 all elements of the vector register. If INVERT is true, then create
10951 a mask excluding the sign bit. */
10952
10953 rtx
10954 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10955 {
10956 enum machine_mode vec_mode, imode;
10957 HOST_WIDE_INT hi, lo;
10958 int shift = 63;
10959 rtx v;
10960 rtx mask;
10961
10962 /* Find the sign bit, sign extended to 2*HWI. */
10963 switch (mode)
10964 {
10965 case SImode:
10966 case SFmode:
10967 imode = SImode;
10968 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
10969 lo = 0x80000000, hi = lo < 0;
10970 break;
10971
10972 case DImode:
10973 case DFmode:
10974 imode = DImode;
10975 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
10976 if (HOST_BITS_PER_WIDE_INT >= 64)
10977 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10978 else
10979 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10980 break;
10981
10982 case TImode:
10983 case TFmode:
10984 imode = TImode;
10985 vec_mode = VOIDmode;
10986 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
10987 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
10988 break;
10989
10990 default:
10991 gcc_unreachable ();
10992 }
10993
10994 if (invert)
10995 lo = ~lo, hi = ~hi;
10996
10997 /* Force this value into the low part of a fp vector constant. */
10998 mask = immed_double_const (lo, hi, imode);
10999 mask = gen_lowpart (mode, mask);
11000
11001 if (vec_mode == VOIDmode)
11002 return force_reg (mode, mask);
11003
11004 v = ix86_build_const_vector (mode, vect, mask);
11005 return force_reg (vec_mode, v);
11006 }
11007
11008 /* Generate code for floating point ABS or NEG. */
11009
11010 void
11011 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
11012 rtx operands[])
11013 {
11014 rtx mask, set, use, clob, dst, src;
11015 bool matching_memory;
11016 bool use_sse = false;
11017 bool vector_mode = VECTOR_MODE_P (mode);
11018 enum machine_mode elt_mode = mode;
11019
11020 if (vector_mode)
11021 {
11022 elt_mode = GET_MODE_INNER (mode);
11023 use_sse = true;
11024 }
11025 else if (mode == TFmode)
11026 use_sse = true;
11027 else if (TARGET_SSE_MATH)
11028 use_sse = SSE_FLOAT_MODE_P (mode);
11029
11030 /* NEG and ABS performed with SSE use bitwise mask operations.
11031 Create the appropriate mask now. */
11032 if (use_sse)
11033 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
11034 else
11035 mask = NULL_RTX;
11036
11037 dst = operands[0];
11038 src = operands[1];
11039
11040 /* If the destination is memory, and we don't have matching source
11041 operands or we're using the x87, do things in registers. */
11042 matching_memory = false;
11043 if (MEM_P (dst))
11044 {
11045 if (use_sse && rtx_equal_p (dst, src))
11046 matching_memory = true;
11047 else
11048 dst = gen_reg_rtx (mode);
11049 }
11050 if (MEM_P (src) && !matching_memory)
11051 src = force_reg (mode, src);
11052
11053 if (vector_mode)
11054 {
11055 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
11056 set = gen_rtx_SET (VOIDmode, dst, set);
11057 emit_insn (set);
11058 }
11059 else
11060 {
11061 set = gen_rtx_fmt_e (code, mode, src);
11062 set = gen_rtx_SET (VOIDmode, dst, set);
11063 if (mask)
11064 {
11065 use = gen_rtx_USE (VOIDmode, mask);
11066 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11067 emit_insn (gen_rtx_PARALLEL (VOIDmode,
11068 gen_rtvec (3, set, use, clob)));
11069 }
11070 else
11071 emit_insn (set);
11072 }
11073
11074 if (dst != operands[0])
11075 emit_move_insn (operands[0], dst);
11076 }
11077
11078 /* Expand a copysign operation. Special case operand 0 being a constant. */
11079
11080 void
11081 ix86_expand_copysign (rtx operands[])
11082 {
11083 enum machine_mode mode, vmode;
11084 rtx dest, op0, op1, mask, nmask;
11085
11086 dest = operands[0];
11087 op0 = operands[1];
11088 op1 = operands[2];
11089
11090 mode = GET_MODE (dest);
11091 vmode = mode == SFmode ? V4SFmode : V2DFmode;
11092
11093 if (GET_CODE (op0) == CONST_DOUBLE)
11094 {
11095 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
11096
11097 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
11098 op0 = simplify_unary_operation (ABS, mode, op0, mode);
11099
11100 if (mode == SFmode || mode == DFmode)
11101 {
11102 if (op0 == CONST0_RTX (mode))
11103 op0 = CONST0_RTX (vmode);
11104 else
11105 {
11106 rtvec v;
11107
11108 if (mode == SFmode)
11109 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
11110 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11111 else
11112 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
11113 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
11114 }
11115 }
11116
11117 mask = ix86_build_signbit_mask (mode, 0, 0);
11118
11119 if (mode == SFmode)
11120 copysign_insn = gen_copysignsf3_const;
11121 else if (mode == DFmode)
11122 copysign_insn = gen_copysigndf3_const;
11123 else
11124 copysign_insn = gen_copysigntf3_const;
11125
11126 emit_insn (copysign_insn (dest, op0, op1, mask));
11127 }
11128 else
11129 {
11130 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
11131
11132 nmask = ix86_build_signbit_mask (mode, 0, 1);
11133 mask = ix86_build_signbit_mask (mode, 0, 0);
11134
11135 if (mode == SFmode)
11136 copysign_insn = gen_copysignsf3_var;
11137 else if (mode == DFmode)
11138 copysign_insn = gen_copysigndf3_var;
11139 else
11140 copysign_insn = gen_copysigntf3_var;
11141
11142 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
11143 }
11144 }
11145
11146 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11147 be a constant, and so has already been expanded into a vector constant. */
11148
11149 void
11150 ix86_split_copysign_const (rtx operands[])
11151 {
11152 enum machine_mode mode, vmode;
11153 rtx dest, op0, op1, mask, x;
11154
11155 dest = operands[0];
11156 op0 = operands[1];
11157 op1 = operands[2];
11158 mask = operands[3];
11159
11160 mode = GET_MODE (dest);
11161 vmode = GET_MODE (mask);
11162
11163 dest = simplify_gen_subreg (vmode, dest, mode, 0);
11164 x = gen_rtx_AND (vmode, dest, mask);
11165 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11166
11167 if (op0 != CONST0_RTX (vmode))
11168 {
11169 x = gen_rtx_IOR (vmode, dest, op0);
11170 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11171 }
11172 }
11173
11174 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11175 so we have to do two masks. */
11176
11177 void
11178 ix86_split_copysign_var (rtx operands[])
11179 {
11180 enum machine_mode mode, vmode;
11181 rtx dest, scratch, op0, op1, mask, nmask, x;
11182
11183 dest = operands[0];
11184 scratch = operands[1];
11185 op0 = operands[2];
11186 op1 = operands[3];
11187 nmask = operands[4];
11188 mask = operands[5];
11189
11190 mode = GET_MODE (dest);
11191 vmode = GET_MODE (mask);
11192
11193 if (rtx_equal_p (op0, op1))
11194 {
11195 /* Shouldn't happen often (it's useless, obviously), but when it does
11196 we'd generate incorrect code if we continue below. */
11197 emit_move_insn (dest, op0);
11198 return;
11199 }
11200
11201 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
11202 {
11203 gcc_assert (REGNO (op1) == REGNO (scratch));
11204
11205 x = gen_rtx_AND (vmode, scratch, mask);
11206 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11207
11208 dest = mask;
11209 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11210 x = gen_rtx_NOT (vmode, dest);
11211 x = gen_rtx_AND (vmode, x, op0);
11212 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11213 }
11214 else
11215 {
11216 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
11217 {
11218 x = gen_rtx_AND (vmode, scratch, mask);
11219 }
11220 else /* alternative 2,4 */
11221 {
11222 gcc_assert (REGNO (mask) == REGNO (scratch));
11223 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
11224 x = gen_rtx_AND (vmode, scratch, op1);
11225 }
11226 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11227
11228 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
11229 {
11230 dest = simplify_gen_subreg (vmode, op0, mode, 0);
11231 x = gen_rtx_AND (vmode, dest, nmask);
11232 }
11233 else /* alternative 3,4 */
11234 {
11235 gcc_assert (REGNO (nmask) == REGNO (dest));
11236 dest = nmask;
11237 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11238 x = gen_rtx_AND (vmode, dest, op0);
11239 }
11240 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11241 }
11242
11243 x = gen_rtx_IOR (vmode, dest, scratch);
11244 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11245 }
11246
11247 /* Return TRUE or FALSE depending on whether the first SET in INSN
11248 has source and destination with matching CC modes, and that the
11249 CC mode is at least as constrained as REQ_MODE. */
11250
11251 int
11252 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
11253 {
11254 rtx set;
11255 enum machine_mode set_mode;
11256
11257 set = PATTERN (insn);
11258 if (GET_CODE (set) == PARALLEL)
11259 set = XVECEXP (set, 0, 0);
11260 gcc_assert (GET_CODE (set) == SET);
11261 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
11262
11263 set_mode = GET_MODE (SET_DEST (set));
11264 switch (set_mode)
11265 {
11266 case CCNOmode:
11267 if (req_mode != CCNOmode
11268 && (req_mode != CCmode
11269 || XEXP (SET_SRC (set), 1) != const0_rtx))
11270 return 0;
11271 break;
11272 case CCmode:
11273 if (req_mode == CCGCmode)
11274 return 0;
11275 /* FALLTHRU */
11276 case CCGCmode:
11277 if (req_mode == CCGOCmode || req_mode == CCNOmode)
11278 return 0;
11279 /* FALLTHRU */
11280 case CCGOCmode:
11281 if (req_mode == CCZmode)
11282 return 0;
11283 /* FALLTHRU */
11284 case CCZmode:
11285 break;
11286
11287 default:
11288 gcc_unreachable ();
11289 }
11290
11291 return (GET_MODE (SET_SRC (set)) == set_mode);
11292 }
11293
11294 /* Generate insn patterns to do an integer compare of OPERANDS. */
11295
11296 static rtx
11297 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11298 {
11299 enum machine_mode cmpmode;
11300 rtx tmp, flags;
11301
11302 cmpmode = SELECT_CC_MODE (code, op0, op1);
11303 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11304
11305 /* This is very simple, but making the interface the same as in the
11306 FP case makes the rest of the code easier. */
11307 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11308 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11309
11310 /* Return the test that should be put into the flags user, i.e.
11311 the bcc, scc, or cmov instruction. */
11312 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11313 }
11314
11315 /* Figure out whether to use ordered or unordered fp comparisons.
11316 Return the appropriate mode to use. */
11317
11318 enum machine_mode
11319 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11320 {
11321 /* ??? In order to make all comparisons reversible, we do all comparisons
11322 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11323 all forms trapping and nontrapping comparisons, we can make inequality
11324 comparisons trapping again, since it results in better code when using
11325 FCOM based compares. */
11326 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11327 }
11328
11329 enum machine_mode
11330 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11331 {
11332 enum machine_mode mode = GET_MODE (op0);
11333
11334 if (SCALAR_FLOAT_MODE_P (mode))
11335 {
11336 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11337 return ix86_fp_compare_mode (code);
11338 }
11339
11340 switch (code)
11341 {
11342 /* Only zero flag is needed. */
11343 case EQ: /* ZF=0 */
11344 case NE: /* ZF!=0 */
11345 return CCZmode;
11346 /* Codes needing carry flag. */
11347 case GEU: /* CF=0 */
11348 case LTU: /* CF=1 */
11349 /* Detect overflow checks. They need just the carry flag. */
11350 if (GET_CODE (op0) == PLUS
11351 && rtx_equal_p (op1, XEXP (op0, 0)))
11352 return CCCmode;
11353 else
11354 return CCmode;
11355 case GTU: /* CF=0 & ZF=0 */
11356 case LEU: /* CF=1 | ZF=1 */
11357 /* Detect overflow checks. They need just the carry flag. */
11358 if (GET_CODE (op0) == MINUS
11359 && rtx_equal_p (op1, XEXP (op0, 0)))
11360 return CCCmode;
11361 else
11362 return CCmode;
11363 /* Codes possibly doable only with sign flag when
11364 comparing against zero. */
11365 case GE: /* SF=OF or SF=0 */
11366 case LT: /* SF<>OF or SF=1 */
11367 if (op1 == const0_rtx)
11368 return CCGOCmode;
11369 else
11370 /* For other cases Carry flag is not required. */
11371 return CCGCmode;
11372 /* Codes doable only with sign flag when comparing
11373 against zero, but we miss jump instruction for it
11374 so we need to use relational tests against overflow
11375 that thus needs to be zero. */
11376 case GT: /* ZF=0 & SF=OF */
11377 case LE: /* ZF=1 | SF<>OF */
11378 if (op1 == const0_rtx)
11379 return CCNOmode;
11380 else
11381 return CCGCmode;
11382 /* strcmp pattern do (use flags) and combine may ask us for proper
11383 mode. */
11384 case USE:
11385 return CCmode;
11386 default:
11387 gcc_unreachable ();
11388 }
11389 }
11390
11391 /* Return the fixed registers used for condition codes. */
11392
11393 static bool
11394 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11395 {
11396 *p1 = FLAGS_REG;
11397 *p2 = FPSR_REG;
11398 return true;
11399 }
11400
11401 /* If two condition code modes are compatible, return a condition code
11402 mode which is compatible with both. Otherwise, return
11403 VOIDmode. */
11404
11405 static enum machine_mode
11406 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11407 {
11408 if (m1 == m2)
11409 return m1;
11410
11411 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11412 return VOIDmode;
11413
11414 if ((m1 == CCGCmode && m2 == CCGOCmode)
11415 || (m1 == CCGOCmode && m2 == CCGCmode))
11416 return CCGCmode;
11417
11418 switch (m1)
11419 {
11420 default:
11421 gcc_unreachable ();
11422
11423 case CCmode:
11424 case CCGCmode:
11425 case CCGOCmode:
11426 case CCNOmode:
11427 case CCAmode:
11428 case CCCmode:
11429 case CCOmode:
11430 case CCSmode:
11431 case CCZmode:
11432 switch (m2)
11433 {
11434 default:
11435 return VOIDmode;
11436
11437 case CCmode:
11438 case CCGCmode:
11439 case CCGOCmode:
11440 case CCNOmode:
11441 case CCAmode:
11442 case CCCmode:
11443 case CCOmode:
11444 case CCSmode:
11445 case CCZmode:
11446 return CCmode;
11447 }
11448
11449 case CCFPmode:
11450 case CCFPUmode:
11451 /* These are only compatible with themselves, which we already
11452 checked above. */
11453 return VOIDmode;
11454 }
11455 }
11456
11457 /* Split comparison code CODE into comparisons we can do using branch
11458 instructions. BYPASS_CODE is comparison code for branch that will
11459 branch around FIRST_CODE and SECOND_CODE. If some of branches
11460 is not required, set value to UNKNOWN.
11461 We never require more than two branches. */
11462
11463 void
11464 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11465 enum rtx_code *first_code,
11466 enum rtx_code *second_code)
11467 {
11468 *first_code = code;
11469 *bypass_code = UNKNOWN;
11470 *second_code = UNKNOWN;
11471
11472 /* The fcomi comparison sets flags as follows:
11473
11474 cmp ZF PF CF
11475 > 0 0 0
11476 < 0 0 1
11477 = 1 0 0
11478 un 1 1 1 */
11479
11480 switch (code)
11481 {
11482 case GT: /* GTU - CF=0 & ZF=0 */
11483 case GE: /* GEU - CF=0 */
11484 case ORDERED: /* PF=0 */
11485 case UNORDERED: /* PF=1 */
11486 case UNEQ: /* EQ - ZF=1 */
11487 case UNLT: /* LTU - CF=1 */
11488 case UNLE: /* LEU - CF=1 | ZF=1 */
11489 case LTGT: /* EQ - ZF=0 */
11490 break;
11491 case LT: /* LTU - CF=1 - fails on unordered */
11492 *first_code = UNLT;
11493 *bypass_code = UNORDERED;
11494 break;
11495 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11496 *first_code = UNLE;
11497 *bypass_code = UNORDERED;
11498 break;
11499 case EQ: /* EQ - ZF=1 - fails on unordered */
11500 *first_code = UNEQ;
11501 *bypass_code = UNORDERED;
11502 break;
11503 case NE: /* NE - ZF=0 - fails on unordered */
11504 *first_code = LTGT;
11505 *second_code = UNORDERED;
11506 break;
11507 case UNGE: /* GEU - CF=0 - fails on unordered */
11508 *first_code = GE;
11509 *second_code = UNORDERED;
11510 break;
11511 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11512 *first_code = GT;
11513 *second_code = UNORDERED;
11514 break;
11515 default:
11516 gcc_unreachable ();
11517 }
11518 if (!TARGET_IEEE_FP)
11519 {
11520 *second_code = UNKNOWN;
11521 *bypass_code = UNKNOWN;
11522 }
11523 }
11524
11525 /* Return cost of comparison done fcom + arithmetics operations on AX.
11526 All following functions do use number of instructions as a cost metrics.
11527 In future this should be tweaked to compute bytes for optimize_size and
11528 take into account performance of various instructions on various CPUs. */
11529 static int
11530 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11531 {
11532 if (!TARGET_IEEE_FP)
11533 return 4;
11534 /* The cost of code output by ix86_expand_fp_compare. */
11535 switch (code)
11536 {
11537 case UNLE:
11538 case UNLT:
11539 case LTGT:
11540 case GT:
11541 case GE:
11542 case UNORDERED:
11543 case ORDERED:
11544 case UNEQ:
11545 return 4;
11546 break;
11547 case LT:
11548 case NE:
11549 case EQ:
11550 case UNGE:
11551 return 5;
11552 break;
11553 case LE:
11554 case UNGT:
11555 return 6;
11556 break;
11557 default:
11558 gcc_unreachable ();
11559 }
11560 }
11561
11562 /* Return cost of comparison done using fcomi operation.
11563 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11564 static int
11565 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11566 {
11567 enum rtx_code bypass_code, first_code, second_code;
11568 /* Return arbitrarily high cost when instruction is not supported - this
11569 prevents gcc from using it. */
11570 if (!TARGET_CMOVE)
11571 return 1024;
11572 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11573 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11574 }
11575
11576 /* Return cost of comparison done using sahf operation.
11577 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11578 static int
11579 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11580 {
11581 enum rtx_code bypass_code, first_code, second_code;
11582 /* Return arbitrarily high cost when instruction is not preferred - this
11583 avoids gcc from using it. */
11584 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11585 return 1024;
11586 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11587 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11588 }
11589
11590 /* Compute cost of the comparison done using any method.
11591 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11592 static int
11593 ix86_fp_comparison_cost (enum rtx_code code)
11594 {
11595 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11596 int min;
11597
11598 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11599 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11600
11601 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11602 if (min > sahf_cost)
11603 min = sahf_cost;
11604 if (min > fcomi_cost)
11605 min = fcomi_cost;
11606 return min;
11607 }
11608
11609 /* Return true if we should use an FCOMI instruction for this
11610 fp comparison. */
11611
11612 int
11613 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11614 {
11615 enum rtx_code swapped_code = swap_condition (code);
11616
11617 return ((ix86_fp_comparison_cost (code)
11618 == ix86_fp_comparison_fcomi_cost (code))
11619 || (ix86_fp_comparison_cost (swapped_code)
11620 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11621 }
11622
11623 /* Swap, force into registers, or otherwise massage the two operands
11624 to a fp comparison. The operands are updated in place; the new
11625 comparison code is returned. */
11626
11627 static enum rtx_code
11628 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11629 {
11630 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11631 rtx op0 = *pop0, op1 = *pop1;
11632 enum machine_mode op_mode = GET_MODE (op0);
11633 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11634
11635 /* All of the unordered compare instructions only work on registers.
11636 The same is true of the fcomi compare instructions. The XFmode
11637 compare instructions require registers except when comparing
11638 against zero or when converting operand 1 from fixed point to
11639 floating point. */
11640
11641 if (!is_sse
11642 && (fpcmp_mode == CCFPUmode
11643 || (op_mode == XFmode
11644 && ! (standard_80387_constant_p (op0) == 1
11645 || standard_80387_constant_p (op1) == 1)
11646 && GET_CODE (op1) != FLOAT)
11647 || ix86_use_fcomi_compare (code)))
11648 {
11649 op0 = force_reg (op_mode, op0);
11650 op1 = force_reg (op_mode, op1);
11651 }
11652 else
11653 {
11654 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11655 things around if they appear profitable, otherwise force op0
11656 into a register. */
11657
11658 if (standard_80387_constant_p (op0) == 0
11659 || (MEM_P (op0)
11660 && ! (standard_80387_constant_p (op1) == 0
11661 || MEM_P (op1))))
11662 {
11663 rtx tmp;
11664 tmp = op0, op0 = op1, op1 = tmp;
11665 code = swap_condition (code);
11666 }
11667
11668 if (!REG_P (op0))
11669 op0 = force_reg (op_mode, op0);
11670
11671 if (CONSTANT_P (op1))
11672 {
11673 int tmp = standard_80387_constant_p (op1);
11674 if (tmp == 0)
11675 op1 = validize_mem (force_const_mem (op_mode, op1));
11676 else if (tmp == 1)
11677 {
11678 if (TARGET_CMOVE)
11679 op1 = force_reg (op_mode, op1);
11680 }
11681 else
11682 op1 = force_reg (op_mode, op1);
11683 }
11684 }
11685
11686 /* Try to rearrange the comparison to make it cheaper. */
11687 if (ix86_fp_comparison_cost (code)
11688 > ix86_fp_comparison_cost (swap_condition (code))
11689 && (REG_P (op1) || can_create_pseudo_p ()))
11690 {
11691 rtx tmp;
11692 tmp = op0, op0 = op1, op1 = tmp;
11693 code = swap_condition (code);
11694 if (!REG_P (op0))
11695 op0 = force_reg (op_mode, op0);
11696 }
11697
11698 *pop0 = op0;
11699 *pop1 = op1;
11700 return code;
11701 }
11702
11703 /* Convert comparison codes we use to represent FP comparison to integer
11704 code that will result in proper branch. Return UNKNOWN if no such code
11705 is available. */
11706
11707 enum rtx_code
11708 ix86_fp_compare_code_to_integer (enum rtx_code code)
11709 {
11710 switch (code)
11711 {
11712 case GT:
11713 return GTU;
11714 case GE:
11715 return GEU;
11716 case ORDERED:
11717 case UNORDERED:
11718 return code;
11719 break;
11720 case UNEQ:
11721 return EQ;
11722 break;
11723 case UNLT:
11724 return LTU;
11725 break;
11726 case UNLE:
11727 return LEU;
11728 break;
11729 case LTGT:
11730 return NE;
11731 break;
11732 default:
11733 return UNKNOWN;
11734 }
11735 }
11736
11737 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11738
11739 static rtx
11740 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11741 rtx *second_test, rtx *bypass_test)
11742 {
11743 enum machine_mode fpcmp_mode, intcmp_mode;
11744 rtx tmp, tmp2;
11745 int cost = ix86_fp_comparison_cost (code);
11746 enum rtx_code bypass_code, first_code, second_code;
11747
11748 fpcmp_mode = ix86_fp_compare_mode (code);
11749 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11750
11751 if (second_test)
11752 *second_test = NULL_RTX;
11753 if (bypass_test)
11754 *bypass_test = NULL_RTX;
11755
11756 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11757
11758 /* Do fcomi/sahf based test when profitable. */
11759 if (ix86_fp_comparison_arithmetics_cost (code) > cost
11760 && (bypass_code == UNKNOWN || bypass_test)
11761 && (second_code == UNKNOWN || second_test))
11762 {
11763 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11764 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11765 tmp);
11766 if (TARGET_CMOVE)
11767 emit_insn (tmp);
11768 else
11769 {
11770 gcc_assert (TARGET_SAHF);
11771
11772 if (!scratch)
11773 scratch = gen_reg_rtx (HImode);
11774 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
11775
11776 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
11777 }
11778
11779 /* The FP codes work out to act like unsigned. */
11780 intcmp_mode = fpcmp_mode;
11781 code = first_code;
11782 if (bypass_code != UNKNOWN)
11783 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11784 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11785 const0_rtx);
11786 if (second_code != UNKNOWN)
11787 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11788 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11789 const0_rtx);
11790 }
11791 else
11792 {
11793 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11794 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11795 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11796 if (!scratch)
11797 scratch = gen_reg_rtx (HImode);
11798 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11799
11800 /* In the unordered case, we have to check C2 for NaN's, which
11801 doesn't happen to work out to anything nice combination-wise.
11802 So do some bit twiddling on the value we've got in AH to come
11803 up with an appropriate set of condition codes. */
11804
11805 intcmp_mode = CCNOmode;
11806 switch (code)
11807 {
11808 case GT:
11809 case UNGT:
11810 if (code == GT || !TARGET_IEEE_FP)
11811 {
11812 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11813 code = EQ;
11814 }
11815 else
11816 {
11817 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11818 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11819 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11820 intcmp_mode = CCmode;
11821 code = GEU;
11822 }
11823 break;
11824 case LT:
11825 case UNLT:
11826 if (code == LT && TARGET_IEEE_FP)
11827 {
11828 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11829 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11830 intcmp_mode = CCmode;
11831 code = EQ;
11832 }
11833 else
11834 {
11835 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11836 code = NE;
11837 }
11838 break;
11839 case GE:
11840 case UNGE:
11841 if (code == GE || !TARGET_IEEE_FP)
11842 {
11843 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11844 code = EQ;
11845 }
11846 else
11847 {
11848 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11849 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11850 GEN_INT (0x01)));
11851 code = NE;
11852 }
11853 break;
11854 case LE:
11855 case UNLE:
11856 if (code == LE && TARGET_IEEE_FP)
11857 {
11858 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11859 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11860 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11861 intcmp_mode = CCmode;
11862 code = LTU;
11863 }
11864 else
11865 {
11866 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11867 code = NE;
11868 }
11869 break;
11870 case EQ:
11871 case UNEQ:
11872 if (code == EQ && TARGET_IEEE_FP)
11873 {
11874 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11875 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11876 intcmp_mode = CCmode;
11877 code = EQ;
11878 }
11879 else
11880 {
11881 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11882 code = NE;
11883 break;
11884 }
11885 break;
11886 case NE:
11887 case LTGT:
11888 if (code == NE && TARGET_IEEE_FP)
11889 {
11890 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11891 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11892 GEN_INT (0x40)));
11893 code = NE;
11894 }
11895 else
11896 {
11897 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11898 code = EQ;
11899 }
11900 break;
11901
11902 case UNORDERED:
11903 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11904 code = NE;
11905 break;
11906 case ORDERED:
11907 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11908 code = EQ;
11909 break;
11910
11911 default:
11912 gcc_unreachable ();
11913 }
11914 }
11915
11916 /* Return the test that should be put into the flags user, i.e.
11917 the bcc, scc, or cmov instruction. */
11918 return gen_rtx_fmt_ee (code, VOIDmode,
11919 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11920 const0_rtx);
11921 }
11922
11923 rtx
11924 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11925 {
11926 rtx op0, op1, ret;
11927 op0 = ix86_compare_op0;
11928 op1 = ix86_compare_op1;
11929
11930 if (second_test)
11931 *second_test = NULL_RTX;
11932 if (bypass_test)
11933 *bypass_test = NULL_RTX;
11934
11935 if (ix86_compare_emitted)
11936 {
11937 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11938 ix86_compare_emitted = NULL_RTX;
11939 }
11940 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11941 {
11942 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11943 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11944 second_test, bypass_test);
11945 }
11946 else
11947 ret = ix86_expand_int_compare (code, op0, op1);
11948
11949 return ret;
11950 }
11951
11952 /* Return true if the CODE will result in nontrivial jump sequence. */
11953 bool
11954 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11955 {
11956 enum rtx_code bypass_code, first_code, second_code;
11957 if (!TARGET_CMOVE)
11958 return true;
11959 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11960 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11961 }
11962
11963 void
11964 ix86_expand_branch (enum rtx_code code, rtx label)
11965 {
11966 rtx tmp;
11967
11968 /* If we have emitted a compare insn, go straight to simple.
11969 ix86_expand_compare won't emit anything if ix86_compare_emitted
11970 is non NULL. */
11971 if (ix86_compare_emitted)
11972 goto simple;
11973
11974 switch (GET_MODE (ix86_compare_op0))
11975 {
11976 case QImode:
11977 case HImode:
11978 case SImode:
11979 simple:
11980 tmp = ix86_expand_compare (code, NULL, NULL);
11981 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11982 gen_rtx_LABEL_REF (VOIDmode, label),
11983 pc_rtx);
11984 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11985 return;
11986
11987 case SFmode:
11988 case DFmode:
11989 case XFmode:
11990 {
11991 rtvec vec;
11992 int use_fcomi;
11993 enum rtx_code bypass_code, first_code, second_code;
11994
11995 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11996 &ix86_compare_op1);
11997
11998 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11999
12000 /* Check whether we will use the natural sequence with one jump. If
12001 so, we can expand jump early. Otherwise delay expansion by
12002 creating compound insn to not confuse optimizers. */
12003 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
12004 {
12005 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
12006 gen_rtx_LABEL_REF (VOIDmode, label),
12007 pc_rtx, NULL_RTX, NULL_RTX);
12008 }
12009 else
12010 {
12011 tmp = gen_rtx_fmt_ee (code, VOIDmode,
12012 ix86_compare_op0, ix86_compare_op1);
12013 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12014 gen_rtx_LABEL_REF (VOIDmode, label),
12015 pc_rtx);
12016 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
12017
12018 use_fcomi = ix86_use_fcomi_compare (code);
12019 vec = rtvec_alloc (3 + !use_fcomi);
12020 RTVEC_ELT (vec, 0) = tmp;
12021 RTVEC_ELT (vec, 1)
12022 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
12023 RTVEC_ELT (vec, 2)
12024 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
12025 if (! use_fcomi)
12026 RTVEC_ELT (vec, 3)
12027 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
12028
12029 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
12030 }
12031 return;
12032 }
12033
12034 case DImode:
12035 if (TARGET_64BIT)
12036 goto simple;
12037 case TImode:
12038 /* Expand DImode branch into multiple compare+branch. */
12039 {
12040 rtx lo[2], hi[2], label2;
12041 enum rtx_code code1, code2, code3;
12042 enum machine_mode submode;
12043
12044 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
12045 {
12046 tmp = ix86_compare_op0;
12047 ix86_compare_op0 = ix86_compare_op1;
12048 ix86_compare_op1 = tmp;
12049 code = swap_condition (code);
12050 }
12051 if (GET_MODE (ix86_compare_op0) == DImode)
12052 {
12053 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
12054 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
12055 submode = SImode;
12056 }
12057 else
12058 {
12059 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
12060 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
12061 submode = DImode;
12062 }
12063
12064 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12065 avoid two branches. This costs one extra insn, so disable when
12066 optimizing for size. */
12067
12068 if ((code == EQ || code == NE)
12069 && (!optimize_size
12070 || hi[1] == const0_rtx || lo[1] == const0_rtx))
12071 {
12072 rtx xor0, xor1;
12073
12074 xor1 = hi[0];
12075 if (hi[1] != const0_rtx)
12076 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
12077 NULL_RTX, 0, OPTAB_WIDEN);
12078
12079 xor0 = lo[0];
12080 if (lo[1] != const0_rtx)
12081 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
12082 NULL_RTX, 0, OPTAB_WIDEN);
12083
12084 tmp = expand_binop (submode, ior_optab, xor1, xor0,
12085 NULL_RTX, 0, OPTAB_WIDEN);
12086
12087 ix86_compare_op0 = tmp;
12088 ix86_compare_op1 = const0_rtx;
12089 ix86_expand_branch (code, label);
12090 return;
12091 }
12092
12093 /* Otherwise, if we are doing less-than or greater-or-equal-than,
12094 op1 is a constant and the low word is zero, then we can just
12095 examine the high word. */
12096
12097 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
12098 switch (code)
12099 {
12100 case LT: case LTU: case GE: case GEU:
12101 ix86_compare_op0 = hi[0];
12102 ix86_compare_op1 = hi[1];
12103 ix86_expand_branch (code, label);
12104 return;
12105 default:
12106 break;
12107 }
12108
12109 /* Otherwise, we need two or three jumps. */
12110
12111 label2 = gen_label_rtx ();
12112
12113 code1 = code;
12114 code2 = swap_condition (code);
12115 code3 = unsigned_condition (code);
12116
12117 switch (code)
12118 {
12119 case LT: case GT: case LTU: case GTU:
12120 break;
12121
12122 case LE: code1 = LT; code2 = GT; break;
12123 case GE: code1 = GT; code2 = LT; break;
12124 case LEU: code1 = LTU; code2 = GTU; break;
12125 case GEU: code1 = GTU; code2 = LTU; break;
12126
12127 case EQ: code1 = UNKNOWN; code2 = NE; break;
12128 case NE: code2 = UNKNOWN; break;
12129
12130 default:
12131 gcc_unreachable ();
12132 }
12133
12134 /*
12135 * a < b =>
12136 * if (hi(a) < hi(b)) goto true;
12137 * if (hi(a) > hi(b)) goto false;
12138 * if (lo(a) < lo(b)) goto true;
12139 * false:
12140 */
12141
12142 ix86_compare_op0 = hi[0];
12143 ix86_compare_op1 = hi[1];
12144
12145 if (code1 != UNKNOWN)
12146 ix86_expand_branch (code1, label);
12147 if (code2 != UNKNOWN)
12148 ix86_expand_branch (code2, label2);
12149
12150 ix86_compare_op0 = lo[0];
12151 ix86_compare_op1 = lo[1];
12152 ix86_expand_branch (code3, label);
12153
12154 if (code2 != UNKNOWN)
12155 emit_label (label2);
12156 return;
12157 }
12158
12159 default:
12160 gcc_unreachable ();
12161 }
12162 }
12163
12164 /* Split branch based on floating point condition. */
12165 void
12166 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
12167 rtx target1, rtx target2, rtx tmp, rtx pushed)
12168 {
12169 rtx second, bypass;
12170 rtx label = NULL_RTX;
12171 rtx condition;
12172 int bypass_probability = -1, second_probability = -1, probability = -1;
12173 rtx i;
12174
12175 if (target2 != pc_rtx)
12176 {
12177 rtx tmp = target2;
12178 code = reverse_condition_maybe_unordered (code);
12179 target2 = target1;
12180 target1 = tmp;
12181 }
12182
12183 condition = ix86_expand_fp_compare (code, op1, op2,
12184 tmp, &second, &bypass);
12185
12186 /* Remove pushed operand from stack. */
12187 if (pushed)
12188 ix86_free_from_memory (GET_MODE (pushed));
12189
12190 if (split_branch_probability >= 0)
12191 {
12192 /* Distribute the probabilities across the jumps.
12193 Assume the BYPASS and SECOND to be always test
12194 for UNORDERED. */
12195 probability = split_branch_probability;
12196
12197 /* Value of 1 is low enough to make no need for probability
12198 to be updated. Later we may run some experiments and see
12199 if unordered values are more frequent in practice. */
12200 if (bypass)
12201 bypass_probability = 1;
12202 if (second)
12203 second_probability = 1;
12204 }
12205 if (bypass != NULL_RTX)
12206 {
12207 label = gen_label_rtx ();
12208 i = emit_jump_insn (gen_rtx_SET
12209 (VOIDmode, pc_rtx,
12210 gen_rtx_IF_THEN_ELSE (VOIDmode,
12211 bypass,
12212 gen_rtx_LABEL_REF (VOIDmode,
12213 label),
12214 pc_rtx)));
12215 if (bypass_probability >= 0)
12216 REG_NOTES (i)
12217 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12218 GEN_INT (bypass_probability),
12219 REG_NOTES (i));
12220 }
12221 i = emit_jump_insn (gen_rtx_SET
12222 (VOIDmode, pc_rtx,
12223 gen_rtx_IF_THEN_ELSE (VOIDmode,
12224 condition, target1, target2)));
12225 if (probability >= 0)
12226 REG_NOTES (i)
12227 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12228 GEN_INT (probability),
12229 REG_NOTES (i));
12230 if (second != NULL_RTX)
12231 {
12232 i = emit_jump_insn (gen_rtx_SET
12233 (VOIDmode, pc_rtx,
12234 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
12235 target2)));
12236 if (second_probability >= 0)
12237 REG_NOTES (i)
12238 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12239 GEN_INT (second_probability),
12240 REG_NOTES (i));
12241 }
12242 if (label != NULL_RTX)
12243 emit_label (label);
12244 }
12245
12246 int
12247 ix86_expand_setcc (enum rtx_code code, rtx dest)
12248 {
12249 rtx ret, tmp, tmpreg, equiv;
12250 rtx second_test, bypass_test;
12251
12252 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
12253 return 0; /* FAIL */
12254
12255 gcc_assert (GET_MODE (dest) == QImode);
12256
12257 ret = ix86_expand_compare (code, &second_test, &bypass_test);
12258 PUT_MODE (ret, QImode);
12259
12260 tmp = dest;
12261 tmpreg = dest;
12262
12263 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
12264 if (bypass_test || second_test)
12265 {
12266 rtx test = second_test;
12267 int bypass = 0;
12268 rtx tmp2 = gen_reg_rtx (QImode);
12269 if (bypass_test)
12270 {
12271 gcc_assert (!second_test);
12272 test = bypass_test;
12273 bypass = 1;
12274 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12275 }
12276 PUT_MODE (test, QImode);
12277 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12278
12279 if (bypass)
12280 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12281 else
12282 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12283 }
12284
12285 /* Attach a REG_EQUAL note describing the comparison result. */
12286 if (ix86_compare_op0 && ix86_compare_op1)
12287 {
12288 equiv = simplify_gen_relational (code, QImode,
12289 GET_MODE (ix86_compare_op0),
12290 ix86_compare_op0, ix86_compare_op1);
12291 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12292 }
12293
12294 return 1; /* DONE */
12295 }
12296
12297 /* Expand comparison setting or clearing carry flag. Return true when
12298 successful and set pop for the operation. */
12299 static bool
12300 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12301 {
12302 enum machine_mode mode =
12303 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12304
12305 /* Do not handle DImode compares that go through special path. */
12306 if (mode == (TARGET_64BIT ? TImode : DImode))
12307 return false;
12308
12309 if (SCALAR_FLOAT_MODE_P (mode))
12310 {
12311 rtx second_test = NULL, bypass_test = NULL;
12312 rtx compare_op, compare_seq;
12313
12314 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12315
12316 /* Shortcut: following common codes never translate
12317 into carry flag compares. */
12318 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12319 || code == ORDERED || code == UNORDERED)
12320 return false;
12321
12322 /* These comparisons require zero flag; swap operands so they won't. */
12323 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12324 && !TARGET_IEEE_FP)
12325 {
12326 rtx tmp = op0;
12327 op0 = op1;
12328 op1 = tmp;
12329 code = swap_condition (code);
12330 }
12331
12332 /* Try to expand the comparison and verify that we end up with
12333 carry flag based comparison. This fails to be true only when
12334 we decide to expand comparison using arithmetic that is not
12335 too common scenario. */
12336 start_sequence ();
12337 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12338 &second_test, &bypass_test);
12339 compare_seq = get_insns ();
12340 end_sequence ();
12341
12342 if (second_test || bypass_test)
12343 return false;
12344
12345 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12346 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12347 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12348 else
12349 code = GET_CODE (compare_op);
12350
12351 if (code != LTU && code != GEU)
12352 return false;
12353
12354 emit_insn (compare_seq);
12355 *pop = compare_op;
12356 return true;
12357 }
12358
12359 if (!INTEGRAL_MODE_P (mode))
12360 return false;
12361
12362 switch (code)
12363 {
12364 case LTU:
12365 case GEU:
12366 break;
12367
12368 /* Convert a==0 into (unsigned)a<1. */
12369 case EQ:
12370 case NE:
12371 if (op1 != const0_rtx)
12372 return false;
12373 op1 = const1_rtx;
12374 code = (code == EQ ? LTU : GEU);
12375 break;
12376
12377 /* Convert a>b into b<a or a>=b-1. */
12378 case GTU:
12379 case LEU:
12380 if (CONST_INT_P (op1))
12381 {
12382 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12383 /* Bail out on overflow. We still can swap operands but that
12384 would force loading of the constant into register. */
12385 if (op1 == const0_rtx
12386 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12387 return false;
12388 code = (code == GTU ? GEU : LTU);
12389 }
12390 else
12391 {
12392 rtx tmp = op1;
12393 op1 = op0;
12394 op0 = tmp;
12395 code = (code == GTU ? LTU : GEU);
12396 }
12397 break;
12398
12399 /* Convert a>=0 into (unsigned)a<0x80000000. */
12400 case LT:
12401 case GE:
12402 if (mode == DImode || op1 != const0_rtx)
12403 return false;
12404 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12405 code = (code == LT ? GEU : LTU);
12406 break;
12407 case LE:
12408 case GT:
12409 if (mode == DImode || op1 != constm1_rtx)
12410 return false;
12411 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12412 code = (code == LE ? GEU : LTU);
12413 break;
12414
12415 default:
12416 return false;
12417 }
12418 /* Swapping operands may cause constant to appear as first operand. */
12419 if (!nonimmediate_operand (op0, VOIDmode))
12420 {
12421 if (!can_create_pseudo_p ())
12422 return false;
12423 op0 = force_reg (mode, op0);
12424 }
12425 ix86_compare_op0 = op0;
12426 ix86_compare_op1 = op1;
12427 *pop = ix86_expand_compare (code, NULL, NULL);
12428 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12429 return true;
12430 }
12431
12432 int
12433 ix86_expand_int_movcc (rtx operands[])
12434 {
12435 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12436 rtx compare_seq, compare_op;
12437 rtx second_test, bypass_test;
12438 enum machine_mode mode = GET_MODE (operands[0]);
12439 bool sign_bit_compare_p = false;;
12440
12441 start_sequence ();
12442 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12443 compare_seq = get_insns ();
12444 end_sequence ();
12445
12446 compare_code = GET_CODE (compare_op);
12447
12448 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12449 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12450 sign_bit_compare_p = true;
12451
12452 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12453 HImode insns, we'd be swallowed in word prefix ops. */
12454
12455 if ((mode != HImode || TARGET_FAST_PREFIX)
12456 && (mode != (TARGET_64BIT ? TImode : DImode))
12457 && CONST_INT_P (operands[2])
12458 && CONST_INT_P (operands[3]))
12459 {
12460 rtx out = operands[0];
12461 HOST_WIDE_INT ct = INTVAL (operands[2]);
12462 HOST_WIDE_INT cf = INTVAL (operands[3]);
12463 HOST_WIDE_INT diff;
12464
12465 diff = ct - cf;
12466 /* Sign bit compares are better done using shifts than we do by using
12467 sbb. */
12468 if (sign_bit_compare_p
12469 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12470 ix86_compare_op1, &compare_op))
12471 {
12472 /* Detect overlap between destination and compare sources. */
12473 rtx tmp = out;
12474
12475 if (!sign_bit_compare_p)
12476 {
12477 bool fpcmp = false;
12478
12479 compare_code = GET_CODE (compare_op);
12480
12481 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12482 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12483 {
12484 fpcmp = true;
12485 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12486 }
12487
12488 /* To simplify rest of code, restrict to the GEU case. */
12489 if (compare_code == LTU)
12490 {
12491 HOST_WIDE_INT tmp = ct;
12492 ct = cf;
12493 cf = tmp;
12494 compare_code = reverse_condition (compare_code);
12495 code = reverse_condition (code);
12496 }
12497 else
12498 {
12499 if (fpcmp)
12500 PUT_CODE (compare_op,
12501 reverse_condition_maybe_unordered
12502 (GET_CODE (compare_op)));
12503 else
12504 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12505 }
12506 diff = ct - cf;
12507
12508 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12509 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12510 tmp = gen_reg_rtx (mode);
12511
12512 if (mode == DImode)
12513 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12514 else
12515 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12516 }
12517 else
12518 {
12519 if (code == GT || code == GE)
12520 code = reverse_condition (code);
12521 else
12522 {
12523 HOST_WIDE_INT tmp = ct;
12524 ct = cf;
12525 cf = tmp;
12526 diff = ct - cf;
12527 }
12528 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12529 ix86_compare_op1, VOIDmode, 0, -1);
12530 }
12531
12532 if (diff == 1)
12533 {
12534 /*
12535 * cmpl op0,op1
12536 * sbbl dest,dest
12537 * [addl dest, ct]
12538 *
12539 * Size 5 - 8.
12540 */
12541 if (ct)
12542 tmp = expand_simple_binop (mode, PLUS,
12543 tmp, GEN_INT (ct),
12544 copy_rtx (tmp), 1, OPTAB_DIRECT);
12545 }
12546 else if (cf == -1)
12547 {
12548 /*
12549 * cmpl op0,op1
12550 * sbbl dest,dest
12551 * orl $ct, dest
12552 *
12553 * Size 8.
12554 */
12555 tmp = expand_simple_binop (mode, IOR,
12556 tmp, GEN_INT (ct),
12557 copy_rtx (tmp), 1, OPTAB_DIRECT);
12558 }
12559 else if (diff == -1 && ct)
12560 {
12561 /*
12562 * cmpl op0,op1
12563 * sbbl dest,dest
12564 * notl dest
12565 * [addl dest, cf]
12566 *
12567 * Size 8 - 11.
12568 */
12569 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12570 if (cf)
12571 tmp = expand_simple_binop (mode, PLUS,
12572 copy_rtx (tmp), GEN_INT (cf),
12573 copy_rtx (tmp), 1, OPTAB_DIRECT);
12574 }
12575 else
12576 {
12577 /*
12578 * cmpl op0,op1
12579 * sbbl dest,dest
12580 * [notl dest]
12581 * andl cf - ct, dest
12582 * [addl dest, ct]
12583 *
12584 * Size 8 - 11.
12585 */
12586
12587 if (cf == 0)
12588 {
12589 cf = ct;
12590 ct = 0;
12591 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12592 }
12593
12594 tmp = expand_simple_binop (mode, AND,
12595 copy_rtx (tmp),
12596 gen_int_mode (cf - ct, mode),
12597 copy_rtx (tmp), 1, OPTAB_DIRECT);
12598 if (ct)
12599 tmp = expand_simple_binop (mode, PLUS,
12600 copy_rtx (tmp), GEN_INT (ct),
12601 copy_rtx (tmp), 1, OPTAB_DIRECT);
12602 }
12603
12604 if (!rtx_equal_p (tmp, out))
12605 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12606
12607 return 1; /* DONE */
12608 }
12609
12610 if (diff < 0)
12611 {
12612 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12613
12614 HOST_WIDE_INT tmp;
12615 tmp = ct, ct = cf, cf = tmp;
12616 diff = -diff;
12617
12618 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12619 {
12620 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12621
12622 /* We may be reversing unordered compare to normal compare, that
12623 is not valid in general (we may convert non-trapping condition
12624 to trapping one), however on i386 we currently emit all
12625 comparisons unordered. */
12626 compare_code = reverse_condition_maybe_unordered (compare_code);
12627 code = reverse_condition_maybe_unordered (code);
12628 }
12629 else
12630 {
12631 compare_code = reverse_condition (compare_code);
12632 code = reverse_condition (code);
12633 }
12634 }
12635
12636 compare_code = UNKNOWN;
12637 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12638 && CONST_INT_P (ix86_compare_op1))
12639 {
12640 if (ix86_compare_op1 == const0_rtx
12641 && (code == LT || code == GE))
12642 compare_code = code;
12643 else if (ix86_compare_op1 == constm1_rtx)
12644 {
12645 if (code == LE)
12646 compare_code = LT;
12647 else if (code == GT)
12648 compare_code = GE;
12649 }
12650 }
12651
12652 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12653 if (compare_code != UNKNOWN
12654 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12655 && (cf == -1 || ct == -1))
12656 {
12657 /* If lea code below could be used, only optimize
12658 if it results in a 2 insn sequence. */
12659
12660 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12661 || diff == 3 || diff == 5 || diff == 9)
12662 || (compare_code == LT && ct == -1)
12663 || (compare_code == GE && cf == -1))
12664 {
12665 /*
12666 * notl op1 (if necessary)
12667 * sarl $31, op1
12668 * orl cf, op1
12669 */
12670 if (ct != -1)
12671 {
12672 cf = ct;
12673 ct = -1;
12674 code = reverse_condition (code);
12675 }
12676
12677 out = emit_store_flag (out, code, ix86_compare_op0,
12678 ix86_compare_op1, VOIDmode, 0, -1);
12679
12680 out = expand_simple_binop (mode, IOR,
12681 out, GEN_INT (cf),
12682 out, 1, OPTAB_DIRECT);
12683 if (out != operands[0])
12684 emit_move_insn (operands[0], out);
12685
12686 return 1; /* DONE */
12687 }
12688 }
12689
12690
12691 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12692 || diff == 3 || diff == 5 || diff == 9)
12693 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12694 && (mode != DImode
12695 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12696 {
12697 /*
12698 * xorl dest,dest
12699 * cmpl op1,op2
12700 * setcc dest
12701 * lea cf(dest*(ct-cf)),dest
12702 *
12703 * Size 14.
12704 *
12705 * This also catches the degenerate setcc-only case.
12706 */
12707
12708 rtx tmp;
12709 int nops;
12710
12711 out = emit_store_flag (out, code, ix86_compare_op0,
12712 ix86_compare_op1, VOIDmode, 0, 1);
12713
12714 nops = 0;
12715 /* On x86_64 the lea instruction operates on Pmode, so we need
12716 to get arithmetics done in proper mode to match. */
12717 if (diff == 1)
12718 tmp = copy_rtx (out);
12719 else
12720 {
12721 rtx out1;
12722 out1 = copy_rtx (out);
12723 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12724 nops++;
12725 if (diff & 1)
12726 {
12727 tmp = gen_rtx_PLUS (mode, tmp, out1);
12728 nops++;
12729 }
12730 }
12731 if (cf != 0)
12732 {
12733 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12734 nops++;
12735 }
12736 if (!rtx_equal_p (tmp, out))
12737 {
12738 if (nops == 1)
12739 out = force_operand (tmp, copy_rtx (out));
12740 else
12741 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12742 }
12743 if (!rtx_equal_p (out, operands[0]))
12744 emit_move_insn (operands[0], copy_rtx (out));
12745
12746 return 1; /* DONE */
12747 }
12748
12749 /*
12750 * General case: Jumpful:
12751 * xorl dest,dest cmpl op1, op2
12752 * cmpl op1, op2 movl ct, dest
12753 * setcc dest jcc 1f
12754 * decl dest movl cf, dest
12755 * andl (cf-ct),dest 1:
12756 * addl ct,dest
12757 *
12758 * Size 20. Size 14.
12759 *
12760 * This is reasonably steep, but branch mispredict costs are
12761 * high on modern cpus, so consider failing only if optimizing
12762 * for space.
12763 */
12764
12765 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12766 && BRANCH_COST >= 2)
12767 {
12768 if (cf == 0)
12769 {
12770 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12771
12772 cf = ct;
12773 ct = 0;
12774
12775 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12776 {
12777 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12778
12779 /* We may be reversing unordered compare to normal compare,
12780 that is not valid in general (we may convert non-trapping
12781 condition to trapping one), however on i386 we currently
12782 emit all comparisons unordered. */
12783 code = reverse_condition_maybe_unordered (code);
12784 }
12785 else
12786 {
12787 code = reverse_condition (code);
12788 if (compare_code != UNKNOWN)
12789 compare_code = reverse_condition (compare_code);
12790 }
12791 }
12792
12793 if (compare_code != UNKNOWN)
12794 {
12795 /* notl op1 (if needed)
12796 sarl $31, op1
12797 andl (cf-ct), op1
12798 addl ct, op1
12799
12800 For x < 0 (resp. x <= -1) there will be no notl,
12801 so if possible swap the constants to get rid of the
12802 complement.
12803 True/false will be -1/0 while code below (store flag
12804 followed by decrement) is 0/-1, so the constants need
12805 to be exchanged once more. */
12806
12807 if (compare_code == GE || !cf)
12808 {
12809 code = reverse_condition (code);
12810 compare_code = LT;
12811 }
12812 else
12813 {
12814 HOST_WIDE_INT tmp = cf;
12815 cf = ct;
12816 ct = tmp;
12817 }
12818
12819 out = emit_store_flag (out, code, ix86_compare_op0,
12820 ix86_compare_op1, VOIDmode, 0, -1);
12821 }
12822 else
12823 {
12824 out = emit_store_flag (out, code, ix86_compare_op0,
12825 ix86_compare_op1, VOIDmode, 0, 1);
12826
12827 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12828 copy_rtx (out), 1, OPTAB_DIRECT);
12829 }
12830
12831 out = expand_simple_binop (mode, AND, copy_rtx (out),
12832 gen_int_mode (cf - ct, mode),
12833 copy_rtx (out), 1, OPTAB_DIRECT);
12834 if (ct)
12835 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12836 copy_rtx (out), 1, OPTAB_DIRECT);
12837 if (!rtx_equal_p (out, operands[0]))
12838 emit_move_insn (operands[0], copy_rtx (out));
12839
12840 return 1; /* DONE */
12841 }
12842 }
12843
12844 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12845 {
12846 /* Try a few things more with specific constants and a variable. */
12847
12848 optab op;
12849 rtx var, orig_out, out, tmp;
12850
12851 if (BRANCH_COST <= 2)
12852 return 0; /* FAIL */
12853
12854 /* If one of the two operands is an interesting constant, load a
12855 constant with the above and mask it in with a logical operation. */
12856
12857 if (CONST_INT_P (operands[2]))
12858 {
12859 var = operands[3];
12860 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12861 operands[3] = constm1_rtx, op = and_optab;
12862 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12863 operands[3] = const0_rtx, op = ior_optab;
12864 else
12865 return 0; /* FAIL */
12866 }
12867 else if (CONST_INT_P (operands[3]))
12868 {
12869 var = operands[2];
12870 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12871 operands[2] = constm1_rtx, op = and_optab;
12872 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12873 operands[2] = const0_rtx, op = ior_optab;
12874 else
12875 return 0; /* FAIL */
12876 }
12877 else
12878 return 0; /* FAIL */
12879
12880 orig_out = operands[0];
12881 tmp = gen_reg_rtx (mode);
12882 operands[0] = tmp;
12883
12884 /* Recurse to get the constant loaded. */
12885 if (ix86_expand_int_movcc (operands) == 0)
12886 return 0; /* FAIL */
12887
12888 /* Mask in the interesting variable. */
12889 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12890 OPTAB_WIDEN);
12891 if (!rtx_equal_p (out, orig_out))
12892 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12893
12894 return 1; /* DONE */
12895 }
12896
12897 /*
12898 * For comparison with above,
12899 *
12900 * movl cf,dest
12901 * movl ct,tmp
12902 * cmpl op1,op2
12903 * cmovcc tmp,dest
12904 *
12905 * Size 15.
12906 */
12907
12908 if (! nonimmediate_operand (operands[2], mode))
12909 operands[2] = force_reg (mode, operands[2]);
12910 if (! nonimmediate_operand (operands[3], mode))
12911 operands[3] = force_reg (mode, operands[3]);
12912
12913 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12914 {
12915 rtx tmp = gen_reg_rtx (mode);
12916 emit_move_insn (tmp, operands[3]);
12917 operands[3] = tmp;
12918 }
12919 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12920 {
12921 rtx tmp = gen_reg_rtx (mode);
12922 emit_move_insn (tmp, operands[2]);
12923 operands[2] = tmp;
12924 }
12925
12926 if (! register_operand (operands[2], VOIDmode)
12927 && (mode == QImode
12928 || ! register_operand (operands[3], VOIDmode)))
12929 operands[2] = force_reg (mode, operands[2]);
12930
12931 if (mode == QImode
12932 && ! register_operand (operands[3], VOIDmode))
12933 operands[3] = force_reg (mode, operands[3]);
12934
12935 emit_insn (compare_seq);
12936 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12937 gen_rtx_IF_THEN_ELSE (mode,
12938 compare_op, operands[2],
12939 operands[3])));
12940 if (bypass_test)
12941 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12942 gen_rtx_IF_THEN_ELSE (mode,
12943 bypass_test,
12944 copy_rtx (operands[3]),
12945 copy_rtx (operands[0]))));
12946 if (second_test)
12947 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12948 gen_rtx_IF_THEN_ELSE (mode,
12949 second_test,
12950 copy_rtx (operands[2]),
12951 copy_rtx (operands[0]))));
12952
12953 return 1; /* DONE */
12954 }
12955
12956 /* Swap, force into registers, or otherwise massage the two operands
12957 to an sse comparison with a mask result. Thus we differ a bit from
12958 ix86_prepare_fp_compare_args which expects to produce a flags result.
12959
12960 The DEST operand exists to help determine whether to commute commutative
12961 operators. The POP0/POP1 operands are updated in place. The new
12962 comparison code is returned, or UNKNOWN if not implementable. */
12963
12964 static enum rtx_code
12965 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12966 rtx *pop0, rtx *pop1)
12967 {
12968 rtx tmp;
12969
12970 switch (code)
12971 {
12972 case LTGT:
12973 case UNEQ:
12974 /* We have no LTGT as an operator. We could implement it with
12975 NE & ORDERED, but this requires an extra temporary. It's
12976 not clear that it's worth it. */
12977 return UNKNOWN;
12978
12979 case LT:
12980 case LE:
12981 case UNGT:
12982 case UNGE:
12983 /* These are supported directly. */
12984 break;
12985
12986 case EQ:
12987 case NE:
12988 case UNORDERED:
12989 case ORDERED:
12990 /* For commutative operators, try to canonicalize the destination
12991 operand to be first in the comparison - this helps reload to
12992 avoid extra moves. */
12993 if (!dest || !rtx_equal_p (dest, *pop1))
12994 break;
12995 /* FALLTHRU */
12996
12997 case GE:
12998 case GT:
12999 case UNLE:
13000 case UNLT:
13001 /* These are not supported directly. Swap the comparison operands
13002 to transform into something that is supported. */
13003 tmp = *pop0;
13004 *pop0 = *pop1;
13005 *pop1 = tmp;
13006 code = swap_condition (code);
13007 break;
13008
13009 default:
13010 gcc_unreachable ();
13011 }
13012
13013 return code;
13014 }
13015
13016 /* Detect conditional moves that exactly match min/max operational
13017 semantics. Note that this is IEEE safe, as long as we don't
13018 interchange the operands.
13019
13020 Returns FALSE if this conditional move doesn't match a MIN/MAX,
13021 and TRUE if the operation is successful and instructions are emitted. */
13022
13023 static bool
13024 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
13025 rtx cmp_op1, rtx if_true, rtx if_false)
13026 {
13027 enum machine_mode mode;
13028 bool is_min;
13029 rtx tmp;
13030
13031 if (code == LT)
13032 ;
13033 else if (code == UNGE)
13034 {
13035 tmp = if_true;
13036 if_true = if_false;
13037 if_false = tmp;
13038 }
13039 else
13040 return false;
13041
13042 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
13043 is_min = true;
13044 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
13045 is_min = false;
13046 else
13047 return false;
13048
13049 mode = GET_MODE (dest);
13050
13051 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13052 but MODE may be a vector mode and thus not appropriate. */
13053 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
13054 {
13055 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
13056 rtvec v;
13057
13058 if_true = force_reg (mode, if_true);
13059 v = gen_rtvec (2, if_true, if_false);
13060 tmp = gen_rtx_UNSPEC (mode, v, u);
13061 }
13062 else
13063 {
13064 code = is_min ? SMIN : SMAX;
13065 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
13066 }
13067
13068 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
13069 return true;
13070 }
13071
13072 /* Expand an sse vector comparison. Return the register with the result. */
13073
13074 static rtx
13075 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
13076 rtx op_true, rtx op_false)
13077 {
13078 enum machine_mode mode = GET_MODE (dest);
13079 rtx x;
13080
13081 cmp_op0 = force_reg (mode, cmp_op0);
13082 if (!nonimmediate_operand (cmp_op1, mode))
13083 cmp_op1 = force_reg (mode, cmp_op1);
13084
13085 if (optimize
13086 || reg_overlap_mentioned_p (dest, op_true)
13087 || reg_overlap_mentioned_p (dest, op_false))
13088 dest = gen_reg_rtx (mode);
13089
13090 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
13091 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13092
13093 return dest;
13094 }
13095
13096 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13097 operations. This is used for both scalar and vector conditional moves. */
13098
13099 static void
13100 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
13101 {
13102 enum machine_mode mode = GET_MODE (dest);
13103 rtx t2, t3, x;
13104
13105 if (TARGET_SSE5)
13106 {
13107 rtx pcmov = gen_rtx_SET (mode, dest,
13108 gen_rtx_IF_THEN_ELSE (mode, cmp,
13109 op_true,
13110 op_false));
13111 emit_insn (pcmov);
13112 }
13113 else if (op_false == CONST0_RTX (mode))
13114 {
13115 op_true = force_reg (mode, op_true);
13116 x = gen_rtx_AND (mode, cmp, op_true);
13117 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13118 }
13119 else if (op_true == CONST0_RTX (mode))
13120 {
13121 op_false = force_reg (mode, op_false);
13122 x = gen_rtx_NOT (mode, cmp);
13123 x = gen_rtx_AND (mode, x, op_false);
13124 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13125 }
13126 else
13127 {
13128 op_true = force_reg (mode, op_true);
13129 op_false = force_reg (mode, op_false);
13130
13131 t2 = gen_reg_rtx (mode);
13132 if (optimize)
13133 t3 = gen_reg_rtx (mode);
13134 else
13135 t3 = dest;
13136
13137 x = gen_rtx_AND (mode, op_true, cmp);
13138 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
13139
13140 x = gen_rtx_NOT (mode, cmp);
13141 x = gen_rtx_AND (mode, x, op_false);
13142 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
13143
13144 x = gen_rtx_IOR (mode, t3, t2);
13145 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13146 }
13147 }
13148
13149 /* Expand a floating-point conditional move. Return true if successful. */
13150
13151 int
13152 ix86_expand_fp_movcc (rtx operands[])
13153 {
13154 enum machine_mode mode = GET_MODE (operands[0]);
13155 enum rtx_code code = GET_CODE (operands[1]);
13156 rtx tmp, compare_op, second_test, bypass_test;
13157
13158 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
13159 {
13160 enum machine_mode cmode;
13161
13162 /* Since we've no cmove for sse registers, don't force bad register
13163 allocation just to gain access to it. Deny movcc when the
13164 comparison mode doesn't match the move mode. */
13165 cmode = GET_MODE (ix86_compare_op0);
13166 if (cmode == VOIDmode)
13167 cmode = GET_MODE (ix86_compare_op1);
13168 if (cmode != mode)
13169 return 0;
13170
13171 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13172 &ix86_compare_op0,
13173 &ix86_compare_op1);
13174 if (code == UNKNOWN)
13175 return 0;
13176
13177 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
13178 ix86_compare_op1, operands[2],
13179 operands[3]))
13180 return 1;
13181
13182 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
13183 ix86_compare_op1, operands[2], operands[3]);
13184 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
13185 return 1;
13186 }
13187
13188 /* The floating point conditional move instructions don't directly
13189 support conditions resulting from a signed integer comparison. */
13190
13191 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13192
13193 /* The floating point conditional move instructions don't directly
13194 support signed integer comparisons. */
13195
13196 if (!fcmov_comparison_operator (compare_op, VOIDmode))
13197 {
13198 gcc_assert (!second_test && !bypass_test);
13199 tmp = gen_reg_rtx (QImode);
13200 ix86_expand_setcc (code, tmp);
13201 code = NE;
13202 ix86_compare_op0 = tmp;
13203 ix86_compare_op1 = const0_rtx;
13204 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13205 }
13206 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13207 {
13208 tmp = gen_reg_rtx (mode);
13209 emit_move_insn (tmp, operands[3]);
13210 operands[3] = tmp;
13211 }
13212 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13213 {
13214 tmp = gen_reg_rtx (mode);
13215 emit_move_insn (tmp, operands[2]);
13216 operands[2] = tmp;
13217 }
13218
13219 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13220 gen_rtx_IF_THEN_ELSE (mode, compare_op,
13221 operands[2], operands[3])));
13222 if (bypass_test)
13223 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13224 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
13225 operands[3], operands[0])));
13226 if (second_test)
13227 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13228 gen_rtx_IF_THEN_ELSE (mode, second_test,
13229 operands[2], operands[0])));
13230
13231 return 1;
13232 }
13233
13234 /* Expand a floating-point vector conditional move; a vcond operation
13235 rather than a movcc operation. */
13236
13237 bool
13238 ix86_expand_fp_vcond (rtx operands[])
13239 {
13240 enum rtx_code code = GET_CODE (operands[3]);
13241 rtx cmp;
13242
13243 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13244 &operands[4], &operands[5]);
13245 if (code == UNKNOWN)
13246 return false;
13247
13248 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
13249 operands[5], operands[1], operands[2]))
13250 return true;
13251
13252 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
13253 operands[1], operands[2]);
13254 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
13255 return true;
13256 }
13257
13258 /* Expand a signed/unsigned integral vector conditional move. */
13259
13260 bool
13261 ix86_expand_int_vcond (rtx operands[])
13262 {
13263 enum machine_mode mode = GET_MODE (operands[0]);
13264 enum rtx_code code = GET_CODE (operands[3]);
13265 bool negate = false;
13266 rtx x, cop0, cop1;
13267
13268 cop0 = operands[4];
13269 cop1 = operands[5];
13270
13271 /* Canonicalize the comparison to EQ, GT, GTU. */
13272 switch (code)
13273 {
13274 case EQ:
13275 case GT:
13276 case GTU:
13277 break;
13278
13279 case NE:
13280 case LE:
13281 case LEU:
13282 code = reverse_condition (code);
13283 negate = true;
13284 break;
13285
13286 case GE:
13287 case GEU:
13288 code = reverse_condition (code);
13289 negate = true;
13290 /* FALLTHRU */
13291
13292 case LT:
13293 case LTU:
13294 code = swap_condition (code);
13295 x = cop0, cop0 = cop1, cop1 = x;
13296 break;
13297
13298 default:
13299 gcc_unreachable ();
13300 }
13301
13302 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13303 if (mode == V2DImode)
13304 {
13305 switch (code)
13306 {
13307 case EQ:
13308 /* SSE4.1 supports EQ. */
13309 if (!TARGET_SSE4_1)
13310 return false;
13311 break;
13312
13313 case GT:
13314 case GTU:
13315 /* SSE4.2 supports GT/GTU. */
13316 if (!TARGET_SSE4_2)
13317 return false;
13318 break;
13319
13320 default:
13321 gcc_unreachable ();
13322 }
13323 }
13324
13325 /* Unsigned parallel compare is not supported by the hardware. Play some
13326 tricks to turn this into a signed comparison against 0. */
13327 if (code == GTU)
13328 {
13329 cop0 = force_reg (mode, cop0);
13330
13331 switch (mode)
13332 {
13333 case V4SImode:
13334 case V2DImode:
13335 {
13336 rtx t1, t2, mask;
13337
13338 /* Perform a parallel modulo subtraction. */
13339 t1 = gen_reg_rtx (mode);
13340 emit_insn ((mode == V4SImode
13341 ? gen_subv4si3
13342 : gen_subv2di3) (t1, cop0, cop1));
13343
13344 /* Extract the original sign bit of op0. */
13345 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13346 true, false);
13347 t2 = gen_reg_rtx (mode);
13348 emit_insn ((mode == V4SImode
13349 ? gen_andv4si3
13350 : gen_andv2di3) (t2, cop0, mask));
13351
13352 /* XOR it back into the result of the subtraction. This results
13353 in the sign bit set iff we saw unsigned underflow. */
13354 x = gen_reg_rtx (mode);
13355 emit_insn ((mode == V4SImode
13356 ? gen_xorv4si3
13357 : gen_xorv2di3) (x, t1, t2));
13358
13359 code = GT;
13360 }
13361 break;
13362
13363 case V16QImode:
13364 case V8HImode:
13365 /* Perform a parallel unsigned saturating subtraction. */
13366 x = gen_reg_rtx (mode);
13367 emit_insn (gen_rtx_SET (VOIDmode, x,
13368 gen_rtx_US_MINUS (mode, cop0, cop1)));
13369
13370 code = EQ;
13371 negate = !negate;
13372 break;
13373
13374 default:
13375 gcc_unreachable ();
13376 }
13377
13378 cop0 = x;
13379 cop1 = CONST0_RTX (mode);
13380 }
13381
13382 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13383 operands[1+negate], operands[2-negate]);
13384
13385 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13386 operands[2-negate]);
13387 return true;
13388 }
13389
13390 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13391 true if we should do zero extension, else sign extension. HIGH_P is
13392 true if we want the N/2 high elements, else the low elements. */
13393
13394 void
13395 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13396 {
13397 enum machine_mode imode = GET_MODE (operands[1]);
13398 rtx (*unpack)(rtx, rtx, rtx);
13399 rtx se, dest;
13400
13401 switch (imode)
13402 {
13403 case V16QImode:
13404 if (high_p)
13405 unpack = gen_vec_interleave_highv16qi;
13406 else
13407 unpack = gen_vec_interleave_lowv16qi;
13408 break;
13409 case V8HImode:
13410 if (high_p)
13411 unpack = gen_vec_interleave_highv8hi;
13412 else
13413 unpack = gen_vec_interleave_lowv8hi;
13414 break;
13415 case V4SImode:
13416 if (high_p)
13417 unpack = gen_vec_interleave_highv4si;
13418 else
13419 unpack = gen_vec_interleave_lowv4si;
13420 break;
13421 default:
13422 gcc_unreachable ();
13423 }
13424
13425 dest = gen_lowpart (imode, operands[0]);
13426
13427 if (unsigned_p)
13428 se = force_reg (imode, CONST0_RTX (imode));
13429 else
13430 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13431 operands[1], pc_rtx, pc_rtx);
13432
13433 emit_insn (unpack (dest, operands[1], se));
13434 }
13435
13436 /* This function performs the same task as ix86_expand_sse_unpack,
13437 but with SSE4.1 instructions. */
13438
13439 void
13440 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13441 {
13442 enum machine_mode imode = GET_MODE (operands[1]);
13443 rtx (*unpack)(rtx, rtx);
13444 rtx src, dest;
13445
13446 switch (imode)
13447 {
13448 case V16QImode:
13449 if (unsigned_p)
13450 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13451 else
13452 unpack = gen_sse4_1_extendv8qiv8hi2;
13453 break;
13454 case V8HImode:
13455 if (unsigned_p)
13456 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13457 else
13458 unpack = gen_sse4_1_extendv4hiv4si2;
13459 break;
13460 case V4SImode:
13461 if (unsigned_p)
13462 unpack = gen_sse4_1_zero_extendv2siv2di2;
13463 else
13464 unpack = gen_sse4_1_extendv2siv2di2;
13465 break;
13466 default:
13467 gcc_unreachable ();
13468 }
13469
13470 dest = operands[0];
13471 if (high_p)
13472 {
13473 /* Shift higher 8 bytes to lower 8 bytes. */
13474 src = gen_reg_rtx (imode);
13475 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13476 gen_lowpart (TImode, operands[1]),
13477 GEN_INT (64)));
13478 }
13479 else
13480 src = operands[1];
13481
13482 emit_insn (unpack (dest, src));
13483 }
13484
13485 /* This function performs the same task as ix86_expand_sse_unpack,
13486 but with amdfam15 instructions. */
13487
13488 #define PPERM_SRC 0x00 /* copy source */
13489 #define PPERM_INVERT 0x20 /* invert source */
13490 #define PPERM_REVERSE 0x40 /* bit reverse source */
13491 #define PPERM_REV_INV 0x60 /* bit reverse & invert src */
13492 #define PPERM_ZERO 0x80 /* all 0's */
13493 #define PPERM_ONES 0xa0 /* all 1's */
13494 #define PPERM_SIGN 0xc0 /* propagate sign bit */
13495 #define PPERM_INV_SIGN 0xe0 /* invert & propagate sign */
13496
13497 #define PPERM_SRC1 0x00 /* use first source byte */
13498 #define PPERM_SRC2 0x10 /* use second source byte */
13499
13500 void
13501 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13502 {
13503 enum machine_mode imode = GET_MODE (operands[1]);
13504 int pperm_bytes[16];
13505 int i;
13506 int h = (high_p) ? 8 : 0;
13507 int h2;
13508 int sign_extend;
13509 rtvec v = rtvec_alloc (16);
13510 rtvec vs;
13511 rtx x, p;
13512 rtx op0 = operands[0], op1 = operands[1];
13513
13514 switch (imode)
13515 {
13516 case V16QImode:
13517 vs = rtvec_alloc (8);
13518 h2 = (high_p) ? 8 : 0;
13519 for (i = 0; i < 8; i++)
13520 {
13521 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
13522 pperm_bytes[2*i+1] = ((unsigned_p)
13523 ? PPERM_ZERO
13524 : PPERM_SIGN | PPERM_SRC2 | i | h);
13525 }
13526
13527 for (i = 0; i < 16; i++)
13528 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13529
13530 for (i = 0; i < 8; i++)
13531 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13532
13533 p = gen_rtx_PARALLEL (VOIDmode, vs);
13534 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13535 if (unsigned_p)
13536 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
13537 else
13538 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
13539 break;
13540
13541 case V8HImode:
13542 vs = rtvec_alloc (4);
13543 h2 = (high_p) ? 4 : 0;
13544 for (i = 0; i < 4; i++)
13545 {
13546 sign_extend = ((unsigned_p)
13547 ? PPERM_ZERO
13548 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
13549 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
13550 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
13551 pperm_bytes[4*i+2] = sign_extend;
13552 pperm_bytes[4*i+3] = sign_extend;
13553 }
13554
13555 for (i = 0; i < 16; i++)
13556 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13557
13558 for (i = 0; i < 4; i++)
13559 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13560
13561 p = gen_rtx_PARALLEL (VOIDmode, vs);
13562 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13563 if (unsigned_p)
13564 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
13565 else
13566 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
13567 break;
13568
13569 case V4SImode:
13570 vs = rtvec_alloc (2);
13571 h2 = (high_p) ? 2 : 0;
13572 for (i = 0; i < 2; i++)
13573 {
13574 sign_extend = ((unsigned_p)
13575 ? PPERM_ZERO
13576 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
13577 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
13578 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
13579 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
13580 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
13581 pperm_bytes[8*i+4] = sign_extend;
13582 pperm_bytes[8*i+5] = sign_extend;
13583 pperm_bytes[8*i+6] = sign_extend;
13584 pperm_bytes[8*i+7] = sign_extend;
13585 }
13586
13587 for (i = 0; i < 16; i++)
13588 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13589
13590 for (i = 0; i < 2; i++)
13591 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13592
13593 p = gen_rtx_PARALLEL (VOIDmode, vs);
13594 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13595 if (unsigned_p)
13596 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
13597 else
13598 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
13599 break;
13600
13601 default:
13602 gcc_unreachable ();
13603 }
13604
13605 return;
13606 }
13607
13608 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13609 next narrower integer vector type */
13610 void
13611 ix86_expand_sse5_pack (rtx operands[3])
13612 {
13613 enum machine_mode imode = GET_MODE (operands[0]);
13614 int pperm_bytes[16];
13615 int i;
13616 rtvec v = rtvec_alloc (16);
13617 rtx x;
13618 rtx op0 = operands[0];
13619 rtx op1 = operands[1];
13620 rtx op2 = operands[2];
13621
13622 switch (imode)
13623 {
13624 case V16QImode:
13625 for (i = 0; i < 8; i++)
13626 {
13627 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
13628 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
13629 }
13630
13631 for (i = 0; i < 16; i++)
13632 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13633
13634 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13635 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
13636 break;
13637
13638 case V8HImode:
13639 for (i = 0; i < 4; i++)
13640 {
13641 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
13642 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
13643 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
13644 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
13645 }
13646
13647 for (i = 0; i < 16; i++)
13648 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13649
13650 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13651 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
13652 break;
13653
13654 case V4SImode:
13655 for (i = 0; i < 2; i++)
13656 {
13657 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
13658 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
13659 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
13660 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
13661 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
13662 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
13663 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
13664 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
13665 }
13666
13667 for (i = 0; i < 16; i++)
13668 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13669
13670 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13671 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
13672 break;
13673
13674 default:
13675 gcc_unreachable ();
13676 }
13677
13678 return;
13679 }
13680
13681 /* Expand conditional increment or decrement using adb/sbb instructions.
13682 The default case using setcc followed by the conditional move can be
13683 done by generic code. */
13684 int
13685 ix86_expand_int_addcc (rtx operands[])
13686 {
13687 enum rtx_code code = GET_CODE (operands[1]);
13688 rtx compare_op;
13689 rtx val = const0_rtx;
13690 bool fpcmp = false;
13691 enum machine_mode mode = GET_MODE (operands[0]);
13692
13693 if (operands[3] != const1_rtx
13694 && operands[3] != constm1_rtx)
13695 return 0;
13696 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13697 ix86_compare_op1, &compare_op))
13698 return 0;
13699 code = GET_CODE (compare_op);
13700
13701 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13702 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13703 {
13704 fpcmp = true;
13705 code = ix86_fp_compare_code_to_integer (code);
13706 }
13707
13708 if (code != LTU)
13709 {
13710 val = constm1_rtx;
13711 if (fpcmp)
13712 PUT_CODE (compare_op,
13713 reverse_condition_maybe_unordered
13714 (GET_CODE (compare_op)));
13715 else
13716 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13717 }
13718 PUT_MODE (compare_op, mode);
13719
13720 /* Construct either adc or sbb insn. */
13721 if ((code == LTU) == (operands[3] == constm1_rtx))
13722 {
13723 switch (GET_MODE (operands[0]))
13724 {
13725 case QImode:
13726 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13727 break;
13728 case HImode:
13729 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13730 break;
13731 case SImode:
13732 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13733 break;
13734 case DImode:
13735 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13736 break;
13737 default:
13738 gcc_unreachable ();
13739 }
13740 }
13741 else
13742 {
13743 switch (GET_MODE (operands[0]))
13744 {
13745 case QImode:
13746 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13747 break;
13748 case HImode:
13749 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13750 break;
13751 case SImode:
13752 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13753 break;
13754 case DImode:
13755 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13756 break;
13757 default:
13758 gcc_unreachable ();
13759 }
13760 }
13761 return 1; /* DONE */
13762 }
13763
13764
13765 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13766 works for floating pointer parameters and nonoffsetable memories.
13767 For pushes, it returns just stack offsets; the values will be saved
13768 in the right order. Maximally three parts are generated. */
13769
13770 static int
13771 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13772 {
13773 int size;
13774
13775 if (!TARGET_64BIT)
13776 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13777 else
13778 size = (GET_MODE_SIZE (mode) + 4) / 8;
13779
13780 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13781 gcc_assert (size >= 2 && size <= 3);
13782
13783 /* Optimize constant pool reference to immediates. This is used by fp
13784 moves, that force all constants to memory to allow combining. */
13785 if (MEM_P (operand) && MEM_READONLY_P (operand))
13786 {
13787 rtx tmp = maybe_get_pool_constant (operand);
13788 if (tmp)
13789 operand = tmp;
13790 }
13791
13792 if (MEM_P (operand) && !offsettable_memref_p (operand))
13793 {
13794 /* The only non-offsetable memories we handle are pushes. */
13795 int ok = push_operand (operand, VOIDmode);
13796
13797 gcc_assert (ok);
13798
13799 operand = copy_rtx (operand);
13800 PUT_MODE (operand, Pmode);
13801 parts[0] = parts[1] = parts[2] = operand;
13802 return size;
13803 }
13804
13805 if (GET_CODE (operand) == CONST_VECTOR)
13806 {
13807 enum machine_mode imode = int_mode_for_mode (mode);
13808 /* Caution: if we looked through a constant pool memory above,
13809 the operand may actually have a different mode now. That's
13810 ok, since we want to pun this all the way back to an integer. */
13811 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13812 gcc_assert (operand != NULL);
13813 mode = imode;
13814 }
13815
13816 if (!TARGET_64BIT)
13817 {
13818 if (mode == DImode)
13819 split_di (&operand, 1, &parts[0], &parts[1]);
13820 else
13821 {
13822 if (REG_P (operand))
13823 {
13824 gcc_assert (reload_completed);
13825 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13826 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13827 if (size == 3)
13828 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13829 }
13830 else if (offsettable_memref_p (operand))
13831 {
13832 operand = adjust_address (operand, SImode, 0);
13833 parts[0] = operand;
13834 parts[1] = adjust_address (operand, SImode, 4);
13835 if (size == 3)
13836 parts[2] = adjust_address (operand, SImode, 8);
13837 }
13838 else if (GET_CODE (operand) == CONST_DOUBLE)
13839 {
13840 REAL_VALUE_TYPE r;
13841 long l[4];
13842
13843 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13844 switch (mode)
13845 {
13846 case XFmode:
13847 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
13848 parts[2] = gen_int_mode (l[2], SImode);
13849 break;
13850 case DFmode:
13851 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13852 break;
13853 default:
13854 gcc_unreachable ();
13855 }
13856 parts[1] = gen_int_mode (l[1], SImode);
13857 parts[0] = gen_int_mode (l[0], SImode);
13858 }
13859 else
13860 gcc_unreachable ();
13861 }
13862 }
13863 else
13864 {
13865 if (mode == TImode)
13866 split_ti (&operand, 1, &parts[0], &parts[1]);
13867 if (mode == XFmode || mode == TFmode)
13868 {
13869 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13870 if (REG_P (operand))
13871 {
13872 gcc_assert (reload_completed);
13873 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13874 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13875 }
13876 else if (offsettable_memref_p (operand))
13877 {
13878 operand = adjust_address (operand, DImode, 0);
13879 parts[0] = operand;
13880 parts[1] = adjust_address (operand, upper_mode, 8);
13881 }
13882 else if (GET_CODE (operand) == CONST_DOUBLE)
13883 {
13884 REAL_VALUE_TYPE r;
13885 long l[4];
13886
13887 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13888 real_to_target (l, &r, mode);
13889
13890 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13891 if (HOST_BITS_PER_WIDE_INT >= 64)
13892 parts[0]
13893 = gen_int_mode
13894 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13895 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13896 DImode);
13897 else
13898 parts[0] = immed_double_const (l[0], l[1], DImode);
13899
13900 if (upper_mode == SImode)
13901 parts[1] = gen_int_mode (l[2], SImode);
13902 else if (HOST_BITS_PER_WIDE_INT >= 64)
13903 parts[1]
13904 = gen_int_mode
13905 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13906 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13907 DImode);
13908 else
13909 parts[1] = immed_double_const (l[2], l[3], DImode);
13910 }
13911 else
13912 gcc_unreachable ();
13913 }
13914 }
13915
13916 return size;
13917 }
13918
13919 /* Emit insns to perform a move or push of DI, DF, and XF values.
13920 Return false when normal moves are needed; true when all required
13921 insns have been emitted. Operands 2-4 contain the input values
13922 int the correct order; operands 5-7 contain the output values. */
13923
13924 void
13925 ix86_split_long_move (rtx operands[])
13926 {
13927 rtx part[2][3];
13928 int nparts;
13929 int push = 0;
13930 int collisions = 0;
13931 enum machine_mode mode = GET_MODE (operands[0]);
13932
13933 /* The DFmode expanders may ask us to move double.
13934 For 64bit target this is single move. By hiding the fact
13935 here we simplify i386.md splitters. */
13936 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13937 {
13938 /* Optimize constant pool reference to immediates. This is used by
13939 fp moves, that force all constants to memory to allow combining. */
13940
13941 if (MEM_P (operands[1])
13942 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13943 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13944 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13945 if (push_operand (operands[0], VOIDmode))
13946 {
13947 operands[0] = copy_rtx (operands[0]);
13948 PUT_MODE (operands[0], Pmode);
13949 }
13950 else
13951 operands[0] = gen_lowpart (DImode, operands[0]);
13952 operands[1] = gen_lowpart (DImode, operands[1]);
13953 emit_move_insn (operands[0], operands[1]);
13954 return;
13955 }
13956
13957 /* The only non-offsettable memory we handle is push. */
13958 if (push_operand (operands[0], VOIDmode))
13959 push = 1;
13960 else
13961 gcc_assert (!MEM_P (operands[0])
13962 || offsettable_memref_p (operands[0]));
13963
13964 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13965 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13966
13967 /* When emitting push, take care for source operands on the stack. */
13968 if (push && MEM_P (operands[1])
13969 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13970 {
13971 if (nparts == 3)
13972 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13973 XEXP (part[1][2], 0));
13974 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13975 XEXP (part[1][1], 0));
13976 }
13977
13978 /* We need to do copy in the right order in case an address register
13979 of the source overlaps the destination. */
13980 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13981 {
13982 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13983 collisions++;
13984 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13985 collisions++;
13986 if (nparts == 3
13987 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13988 collisions++;
13989
13990 /* Collision in the middle part can be handled by reordering. */
13991 if (collisions == 1 && nparts == 3
13992 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13993 {
13994 rtx tmp;
13995 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13996 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13997 }
13998
13999 /* If there are more collisions, we can't handle it by reordering.
14000 Do an lea to the last part and use only one colliding move. */
14001 else if (collisions > 1)
14002 {
14003 rtx base;
14004
14005 collisions = 1;
14006
14007 base = part[0][nparts - 1];
14008
14009 /* Handle the case when the last part isn't valid for lea.
14010 Happens in 64-bit mode storing the 12-byte XFmode. */
14011 if (GET_MODE (base) != Pmode)
14012 base = gen_rtx_REG (Pmode, REGNO (base));
14013
14014 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
14015 part[1][0] = replace_equiv_address (part[1][0], base);
14016 part[1][1] = replace_equiv_address (part[1][1],
14017 plus_constant (base, UNITS_PER_WORD));
14018 if (nparts == 3)
14019 part[1][2] = replace_equiv_address (part[1][2],
14020 plus_constant (base, 8));
14021 }
14022 }
14023
14024 if (push)
14025 {
14026 if (!TARGET_64BIT)
14027 {
14028 if (nparts == 3)
14029 {
14030 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
14031 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
14032 emit_move_insn (part[0][2], part[1][2]);
14033 }
14034 }
14035 else
14036 {
14037 /* In 64bit mode we don't have 32bit push available. In case this is
14038 register, it is OK - we will just use larger counterpart. We also
14039 retype memory - these comes from attempt to avoid REX prefix on
14040 moving of second half of TFmode value. */
14041 if (GET_MODE (part[1][1]) == SImode)
14042 {
14043 switch (GET_CODE (part[1][1]))
14044 {
14045 case MEM:
14046 part[1][1] = adjust_address (part[1][1], DImode, 0);
14047 break;
14048
14049 case REG:
14050 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
14051 break;
14052
14053 default:
14054 gcc_unreachable ();
14055 }
14056
14057 if (GET_MODE (part[1][0]) == SImode)
14058 part[1][0] = part[1][1];
14059 }
14060 }
14061 emit_move_insn (part[0][1], part[1][1]);
14062 emit_move_insn (part[0][0], part[1][0]);
14063 return;
14064 }
14065
14066 /* Choose correct order to not overwrite the source before it is copied. */
14067 if ((REG_P (part[0][0])
14068 && REG_P (part[1][1])
14069 && (REGNO (part[0][0]) == REGNO (part[1][1])
14070 || (nparts == 3
14071 && REGNO (part[0][0]) == REGNO (part[1][2]))))
14072 || (collisions > 0
14073 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
14074 {
14075 if (nparts == 3)
14076 {
14077 operands[2] = part[0][2];
14078 operands[3] = part[0][1];
14079 operands[4] = part[0][0];
14080 operands[5] = part[1][2];
14081 operands[6] = part[1][1];
14082 operands[7] = part[1][0];
14083 }
14084 else
14085 {
14086 operands[2] = part[0][1];
14087 operands[3] = part[0][0];
14088 operands[5] = part[1][1];
14089 operands[6] = part[1][0];
14090 }
14091 }
14092 else
14093 {
14094 if (nparts == 3)
14095 {
14096 operands[2] = part[0][0];
14097 operands[3] = part[0][1];
14098 operands[4] = part[0][2];
14099 operands[5] = part[1][0];
14100 operands[6] = part[1][1];
14101 operands[7] = part[1][2];
14102 }
14103 else
14104 {
14105 operands[2] = part[0][0];
14106 operands[3] = part[0][1];
14107 operands[5] = part[1][0];
14108 operands[6] = part[1][1];
14109 }
14110 }
14111
14112 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
14113 if (optimize_size)
14114 {
14115 if (CONST_INT_P (operands[5])
14116 && operands[5] != const0_rtx
14117 && REG_P (operands[2]))
14118 {
14119 if (CONST_INT_P (operands[6])
14120 && INTVAL (operands[6]) == INTVAL (operands[5]))
14121 operands[6] = operands[2];
14122
14123 if (nparts == 3
14124 && CONST_INT_P (operands[7])
14125 && INTVAL (operands[7]) == INTVAL (operands[5]))
14126 operands[7] = operands[2];
14127 }
14128
14129 if (nparts == 3
14130 && CONST_INT_P (operands[6])
14131 && operands[6] != const0_rtx
14132 && REG_P (operands[3])
14133 && CONST_INT_P (operands[7])
14134 && INTVAL (operands[7]) == INTVAL (operands[6]))
14135 operands[7] = operands[3];
14136 }
14137
14138 emit_move_insn (operands[2], operands[5]);
14139 emit_move_insn (operands[3], operands[6]);
14140 if (nparts == 3)
14141 emit_move_insn (operands[4], operands[7]);
14142
14143 return;
14144 }
14145
14146 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
14147 left shift by a constant, either using a single shift or
14148 a sequence of add instructions. */
14149
14150 static void
14151 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
14152 {
14153 if (count == 1)
14154 {
14155 emit_insn ((mode == DImode
14156 ? gen_addsi3
14157 : gen_adddi3) (operand, operand, operand));
14158 }
14159 else if (!optimize_size
14160 && count * ix86_cost->add <= ix86_cost->shift_const)
14161 {
14162 int i;
14163 for (i=0; i<count; i++)
14164 {
14165 emit_insn ((mode == DImode
14166 ? gen_addsi3
14167 : gen_adddi3) (operand, operand, operand));
14168 }
14169 }
14170 else
14171 emit_insn ((mode == DImode
14172 ? gen_ashlsi3
14173 : gen_ashldi3) (operand, operand, GEN_INT (count)));
14174 }
14175
14176 void
14177 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
14178 {
14179 rtx low[2], high[2];
14180 int count;
14181 const int single_width = mode == DImode ? 32 : 64;
14182
14183 if (CONST_INT_P (operands[2]))
14184 {
14185 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14186 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14187
14188 if (count >= single_width)
14189 {
14190 emit_move_insn (high[0], low[1]);
14191 emit_move_insn (low[0], const0_rtx);
14192
14193 if (count > single_width)
14194 ix86_expand_ashl_const (high[0], count - single_width, mode);
14195 }
14196 else
14197 {
14198 if (!rtx_equal_p (operands[0], operands[1]))
14199 emit_move_insn (operands[0], operands[1]);
14200 emit_insn ((mode == DImode
14201 ? gen_x86_shld_1
14202 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
14203 ix86_expand_ashl_const (low[0], count, mode);
14204 }
14205 return;
14206 }
14207
14208 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14209
14210 if (operands[1] == const1_rtx)
14211 {
14212 /* Assuming we've chosen a QImode capable registers, then 1 << N
14213 can be done with two 32/64-bit shifts, no branches, no cmoves. */
14214 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
14215 {
14216 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
14217
14218 ix86_expand_clear (low[0]);
14219 ix86_expand_clear (high[0]);
14220 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
14221
14222 d = gen_lowpart (QImode, low[0]);
14223 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14224 s = gen_rtx_EQ (QImode, flags, const0_rtx);
14225 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14226
14227 d = gen_lowpart (QImode, high[0]);
14228 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14229 s = gen_rtx_NE (QImode, flags, const0_rtx);
14230 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14231 }
14232
14233 /* Otherwise, we can get the same results by manually performing
14234 a bit extract operation on bit 5/6, and then performing the two
14235 shifts. The two methods of getting 0/1 into low/high are exactly
14236 the same size. Avoiding the shift in the bit extract case helps
14237 pentium4 a bit; no one else seems to care much either way. */
14238 else
14239 {
14240 rtx x;
14241
14242 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
14243 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
14244 else
14245 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
14246 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
14247
14248 emit_insn ((mode == DImode
14249 ? gen_lshrsi3
14250 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
14251 emit_insn ((mode == DImode
14252 ? gen_andsi3
14253 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
14254 emit_move_insn (low[0], high[0]);
14255 emit_insn ((mode == DImode
14256 ? gen_xorsi3
14257 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
14258 }
14259
14260 emit_insn ((mode == DImode
14261 ? gen_ashlsi3
14262 : gen_ashldi3) (low[0], low[0], operands[2]));
14263 emit_insn ((mode == DImode
14264 ? gen_ashlsi3
14265 : gen_ashldi3) (high[0], high[0], operands[2]));
14266 return;
14267 }
14268
14269 if (operands[1] == constm1_rtx)
14270 {
14271 /* For -1 << N, we can avoid the shld instruction, because we
14272 know that we're shifting 0...31/63 ones into a -1. */
14273 emit_move_insn (low[0], constm1_rtx);
14274 if (optimize_size)
14275 emit_move_insn (high[0], low[0]);
14276 else
14277 emit_move_insn (high[0], constm1_rtx);
14278 }
14279 else
14280 {
14281 if (!rtx_equal_p (operands[0], operands[1]))
14282 emit_move_insn (operands[0], operands[1]);
14283
14284 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14285 emit_insn ((mode == DImode
14286 ? gen_x86_shld_1
14287 : gen_x86_64_shld) (high[0], low[0], operands[2]));
14288 }
14289
14290 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
14291
14292 if (TARGET_CMOVE && scratch)
14293 {
14294 ix86_expand_clear (scratch);
14295 emit_insn ((mode == DImode
14296 ? gen_x86_shift_adj_1
14297 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
14298 }
14299 else
14300 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
14301 }
14302
14303 void
14304 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
14305 {
14306 rtx low[2], high[2];
14307 int count;
14308 const int single_width = mode == DImode ? 32 : 64;
14309
14310 if (CONST_INT_P (operands[2]))
14311 {
14312 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14313 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14314
14315 if (count == single_width * 2 - 1)
14316 {
14317 emit_move_insn (high[0], high[1]);
14318 emit_insn ((mode == DImode
14319 ? gen_ashrsi3
14320 : gen_ashrdi3) (high[0], high[0],
14321 GEN_INT (single_width - 1)));
14322 emit_move_insn (low[0], high[0]);
14323
14324 }
14325 else if (count >= single_width)
14326 {
14327 emit_move_insn (low[0], high[1]);
14328 emit_move_insn (high[0], low[0]);
14329 emit_insn ((mode == DImode
14330 ? gen_ashrsi3
14331 : gen_ashrdi3) (high[0], high[0],
14332 GEN_INT (single_width - 1)));
14333 if (count > single_width)
14334 emit_insn ((mode == DImode
14335 ? gen_ashrsi3
14336 : gen_ashrdi3) (low[0], low[0],
14337 GEN_INT (count - single_width)));
14338 }
14339 else
14340 {
14341 if (!rtx_equal_p (operands[0], operands[1]))
14342 emit_move_insn (operands[0], operands[1]);
14343 emit_insn ((mode == DImode
14344 ? gen_x86_shrd_1
14345 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14346 emit_insn ((mode == DImode
14347 ? gen_ashrsi3
14348 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
14349 }
14350 }
14351 else
14352 {
14353 if (!rtx_equal_p (operands[0], operands[1]))
14354 emit_move_insn (operands[0], operands[1]);
14355
14356 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14357
14358 emit_insn ((mode == DImode
14359 ? gen_x86_shrd_1
14360 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14361 emit_insn ((mode == DImode
14362 ? gen_ashrsi3
14363 : gen_ashrdi3) (high[0], high[0], operands[2]));
14364
14365 if (TARGET_CMOVE && scratch)
14366 {
14367 emit_move_insn (scratch, high[0]);
14368 emit_insn ((mode == DImode
14369 ? gen_ashrsi3
14370 : gen_ashrdi3) (scratch, scratch,
14371 GEN_INT (single_width - 1)));
14372 emit_insn ((mode == DImode
14373 ? gen_x86_shift_adj_1
14374 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14375 scratch));
14376 }
14377 else
14378 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
14379 }
14380 }
14381
14382 void
14383 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
14384 {
14385 rtx low[2], high[2];
14386 int count;
14387 const int single_width = mode == DImode ? 32 : 64;
14388
14389 if (CONST_INT_P (operands[2]))
14390 {
14391 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14392 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14393
14394 if (count >= single_width)
14395 {
14396 emit_move_insn (low[0], high[1]);
14397 ix86_expand_clear (high[0]);
14398
14399 if (count > single_width)
14400 emit_insn ((mode == DImode
14401 ? gen_lshrsi3
14402 : gen_lshrdi3) (low[0], low[0],
14403 GEN_INT (count - single_width)));
14404 }
14405 else
14406 {
14407 if (!rtx_equal_p (operands[0], operands[1]))
14408 emit_move_insn (operands[0], operands[1]);
14409 emit_insn ((mode == DImode
14410 ? gen_x86_shrd_1
14411 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14412 emit_insn ((mode == DImode
14413 ? gen_lshrsi3
14414 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
14415 }
14416 }
14417 else
14418 {
14419 if (!rtx_equal_p (operands[0], operands[1]))
14420 emit_move_insn (operands[0], operands[1]);
14421
14422 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14423
14424 emit_insn ((mode == DImode
14425 ? gen_x86_shrd_1
14426 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14427 emit_insn ((mode == DImode
14428 ? gen_lshrsi3
14429 : gen_lshrdi3) (high[0], high[0], operands[2]));
14430
14431 /* Heh. By reversing the arguments, we can reuse this pattern. */
14432 if (TARGET_CMOVE && scratch)
14433 {
14434 ix86_expand_clear (scratch);
14435 emit_insn ((mode == DImode
14436 ? gen_x86_shift_adj_1
14437 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14438 scratch));
14439 }
14440 else
14441 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
14442 }
14443 }
14444
14445 /* Predict just emitted jump instruction to be taken with probability PROB. */
14446 static void
14447 predict_jump (int prob)
14448 {
14449 rtx insn = get_last_insn ();
14450 gcc_assert (JUMP_P (insn));
14451 REG_NOTES (insn)
14452 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14453 GEN_INT (prob),
14454 REG_NOTES (insn));
14455 }
14456
14457 /* Helper function for the string operations below. Dest VARIABLE whether
14458 it is aligned to VALUE bytes. If true, jump to the label. */
14459 static rtx
14460 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
14461 {
14462 rtx label = gen_label_rtx ();
14463 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
14464 if (GET_MODE (variable) == DImode)
14465 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
14466 else
14467 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
14468 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
14469 1, label);
14470 if (epilogue)
14471 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14472 else
14473 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14474 return label;
14475 }
14476
14477 /* Adjust COUNTER by the VALUE. */
14478 static void
14479 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
14480 {
14481 if (GET_MODE (countreg) == DImode)
14482 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14483 else
14484 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14485 }
14486
14487 /* Zero extend possibly SImode EXP to Pmode register. */
14488 rtx
14489 ix86_zero_extend_to_Pmode (rtx exp)
14490 {
14491 rtx r;
14492 if (GET_MODE (exp) == VOIDmode)
14493 return force_reg (Pmode, exp);
14494 if (GET_MODE (exp) == Pmode)
14495 return copy_to_mode_reg (Pmode, exp);
14496 r = gen_reg_rtx (Pmode);
14497 emit_insn (gen_zero_extendsidi2 (r, exp));
14498 return r;
14499 }
14500
14501 /* Divide COUNTREG by SCALE. */
14502 static rtx
14503 scale_counter (rtx countreg, int scale)
14504 {
14505 rtx sc;
14506 rtx piece_size_mask;
14507
14508 if (scale == 1)
14509 return countreg;
14510 if (CONST_INT_P (countreg))
14511 return GEN_INT (INTVAL (countreg) / scale);
14512 gcc_assert (REG_P (countreg));
14513
14514 piece_size_mask = GEN_INT (scale - 1);
14515 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14516 GEN_INT (exact_log2 (scale)),
14517 NULL, 1, OPTAB_DIRECT);
14518 return sc;
14519 }
14520
14521 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14522 DImode for constant loop counts. */
14523
14524 static enum machine_mode
14525 counter_mode (rtx count_exp)
14526 {
14527 if (GET_MODE (count_exp) != VOIDmode)
14528 return GET_MODE (count_exp);
14529 if (GET_CODE (count_exp) != CONST_INT)
14530 return Pmode;
14531 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14532 return DImode;
14533 return SImode;
14534 }
14535
14536 /* When SRCPTR is non-NULL, output simple loop to move memory
14537 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14538 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14539 equivalent loop to set memory by VALUE (supposed to be in MODE).
14540
14541 The size is rounded down to whole number of chunk size moved at once.
14542 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14543
14544
14545 static void
14546 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14547 rtx destptr, rtx srcptr, rtx value,
14548 rtx count, enum machine_mode mode, int unroll,
14549 int expected_size)
14550 {
14551 rtx out_label, top_label, iter, tmp;
14552 enum machine_mode iter_mode = counter_mode (count);
14553 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14554 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14555 rtx size;
14556 rtx x_addr;
14557 rtx y_addr;
14558 int i;
14559
14560 top_label = gen_label_rtx ();
14561 out_label = gen_label_rtx ();
14562 iter = gen_reg_rtx (iter_mode);
14563
14564 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14565 NULL, 1, OPTAB_DIRECT);
14566 /* Those two should combine. */
14567 if (piece_size == const1_rtx)
14568 {
14569 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14570 true, out_label);
14571 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14572 }
14573 emit_move_insn (iter, const0_rtx);
14574
14575 emit_label (top_label);
14576
14577 tmp = convert_modes (Pmode, iter_mode, iter, true);
14578 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14579 destmem = change_address (destmem, mode, x_addr);
14580
14581 if (srcmem)
14582 {
14583 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14584 srcmem = change_address (srcmem, mode, y_addr);
14585
14586 /* When unrolling for chips that reorder memory reads and writes,
14587 we can save registers by using single temporary.
14588 Also using 4 temporaries is overkill in 32bit mode. */
14589 if (!TARGET_64BIT && 0)
14590 {
14591 for (i = 0; i < unroll; i++)
14592 {
14593 if (i)
14594 {
14595 destmem =
14596 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14597 srcmem =
14598 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14599 }
14600 emit_move_insn (destmem, srcmem);
14601 }
14602 }
14603 else
14604 {
14605 rtx tmpreg[4];
14606 gcc_assert (unroll <= 4);
14607 for (i = 0; i < unroll; i++)
14608 {
14609 tmpreg[i] = gen_reg_rtx (mode);
14610 if (i)
14611 {
14612 srcmem =
14613 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14614 }
14615 emit_move_insn (tmpreg[i], srcmem);
14616 }
14617 for (i = 0; i < unroll; i++)
14618 {
14619 if (i)
14620 {
14621 destmem =
14622 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14623 }
14624 emit_move_insn (destmem, tmpreg[i]);
14625 }
14626 }
14627 }
14628 else
14629 for (i = 0; i < unroll; i++)
14630 {
14631 if (i)
14632 destmem =
14633 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14634 emit_move_insn (destmem, value);
14635 }
14636
14637 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14638 true, OPTAB_LIB_WIDEN);
14639 if (tmp != iter)
14640 emit_move_insn (iter, tmp);
14641
14642 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14643 true, top_label);
14644 if (expected_size != -1)
14645 {
14646 expected_size /= GET_MODE_SIZE (mode) * unroll;
14647 if (expected_size == 0)
14648 predict_jump (0);
14649 else if (expected_size > REG_BR_PROB_BASE)
14650 predict_jump (REG_BR_PROB_BASE - 1);
14651 else
14652 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14653 }
14654 else
14655 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14656 iter = ix86_zero_extend_to_Pmode (iter);
14657 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14658 true, OPTAB_LIB_WIDEN);
14659 if (tmp != destptr)
14660 emit_move_insn (destptr, tmp);
14661 if (srcptr)
14662 {
14663 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14664 true, OPTAB_LIB_WIDEN);
14665 if (tmp != srcptr)
14666 emit_move_insn (srcptr, tmp);
14667 }
14668 emit_label (out_label);
14669 }
14670
14671 /* Output "rep; mov" instruction.
14672 Arguments have same meaning as for previous function */
14673 static void
14674 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14675 rtx destptr, rtx srcptr,
14676 rtx count,
14677 enum machine_mode mode)
14678 {
14679 rtx destexp;
14680 rtx srcexp;
14681 rtx countreg;
14682
14683 /* If the size is known, it is shorter to use rep movs. */
14684 if (mode == QImode && CONST_INT_P (count)
14685 && !(INTVAL (count) & 3))
14686 mode = SImode;
14687
14688 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14689 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14690 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14691 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14692 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14693 if (mode != QImode)
14694 {
14695 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14696 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14697 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14698 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14699 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14700 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14701 }
14702 else
14703 {
14704 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14705 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14706 }
14707 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14708 destexp, srcexp));
14709 }
14710
14711 /* Output "rep; stos" instruction.
14712 Arguments have same meaning as for previous function */
14713 static void
14714 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14715 rtx count,
14716 enum machine_mode mode)
14717 {
14718 rtx destexp;
14719 rtx countreg;
14720
14721 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14722 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14723 value = force_reg (mode, gen_lowpart (mode, value));
14724 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14725 if (mode != QImode)
14726 {
14727 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14728 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14729 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14730 }
14731 else
14732 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14733 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14734 }
14735
14736 static void
14737 emit_strmov (rtx destmem, rtx srcmem,
14738 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14739 {
14740 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14741 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14742 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14743 }
14744
14745 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14746 static void
14747 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14748 rtx destptr, rtx srcptr, rtx count, int max_size)
14749 {
14750 rtx src, dest;
14751 if (CONST_INT_P (count))
14752 {
14753 HOST_WIDE_INT countval = INTVAL (count);
14754 int offset = 0;
14755
14756 if ((countval & 0x10) && max_size > 16)
14757 {
14758 if (TARGET_64BIT)
14759 {
14760 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14761 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14762 }
14763 else
14764 gcc_unreachable ();
14765 offset += 16;
14766 }
14767 if ((countval & 0x08) && max_size > 8)
14768 {
14769 if (TARGET_64BIT)
14770 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14771 else
14772 {
14773 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14774 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14775 }
14776 offset += 8;
14777 }
14778 if ((countval & 0x04) && max_size > 4)
14779 {
14780 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14781 offset += 4;
14782 }
14783 if ((countval & 0x02) && max_size > 2)
14784 {
14785 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14786 offset += 2;
14787 }
14788 if ((countval & 0x01) && max_size > 1)
14789 {
14790 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14791 offset += 1;
14792 }
14793 return;
14794 }
14795 if (max_size > 8)
14796 {
14797 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14798 count, 1, OPTAB_DIRECT);
14799 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14800 count, QImode, 1, 4);
14801 return;
14802 }
14803
14804 /* When there are stringops, we can cheaply increase dest and src pointers.
14805 Otherwise we save code size by maintaining offset (zero is readily
14806 available from preceding rep operation) and using x86 addressing modes.
14807 */
14808 if (TARGET_SINGLE_STRINGOP)
14809 {
14810 if (max_size > 4)
14811 {
14812 rtx label = ix86_expand_aligntest (count, 4, true);
14813 src = change_address (srcmem, SImode, srcptr);
14814 dest = change_address (destmem, SImode, destptr);
14815 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14816 emit_label (label);
14817 LABEL_NUSES (label) = 1;
14818 }
14819 if (max_size > 2)
14820 {
14821 rtx label = ix86_expand_aligntest (count, 2, true);
14822 src = change_address (srcmem, HImode, srcptr);
14823 dest = change_address (destmem, HImode, destptr);
14824 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14825 emit_label (label);
14826 LABEL_NUSES (label) = 1;
14827 }
14828 if (max_size > 1)
14829 {
14830 rtx label = ix86_expand_aligntest (count, 1, true);
14831 src = change_address (srcmem, QImode, srcptr);
14832 dest = change_address (destmem, QImode, destptr);
14833 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14834 emit_label (label);
14835 LABEL_NUSES (label) = 1;
14836 }
14837 }
14838 else
14839 {
14840 rtx offset = force_reg (Pmode, const0_rtx);
14841 rtx tmp;
14842
14843 if (max_size > 4)
14844 {
14845 rtx label = ix86_expand_aligntest (count, 4, true);
14846 src = change_address (srcmem, SImode, srcptr);
14847 dest = change_address (destmem, SImode, destptr);
14848 emit_move_insn (dest, src);
14849 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
14850 true, OPTAB_LIB_WIDEN);
14851 if (tmp != offset)
14852 emit_move_insn (offset, tmp);
14853 emit_label (label);
14854 LABEL_NUSES (label) = 1;
14855 }
14856 if (max_size > 2)
14857 {
14858 rtx label = ix86_expand_aligntest (count, 2, true);
14859 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14860 src = change_address (srcmem, HImode, tmp);
14861 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14862 dest = change_address (destmem, HImode, tmp);
14863 emit_move_insn (dest, src);
14864 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14865 true, OPTAB_LIB_WIDEN);
14866 if (tmp != offset)
14867 emit_move_insn (offset, tmp);
14868 emit_label (label);
14869 LABEL_NUSES (label) = 1;
14870 }
14871 if (max_size > 1)
14872 {
14873 rtx label = ix86_expand_aligntest (count, 1, true);
14874 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14875 src = change_address (srcmem, QImode, tmp);
14876 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14877 dest = change_address (destmem, QImode, tmp);
14878 emit_move_insn (dest, src);
14879 emit_label (label);
14880 LABEL_NUSES (label) = 1;
14881 }
14882 }
14883 }
14884
14885 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14886 static void
14887 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14888 rtx count, int max_size)
14889 {
14890 count =
14891 expand_simple_binop (counter_mode (count), AND, count,
14892 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14893 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14894 gen_lowpart (QImode, value), count, QImode,
14895 1, max_size / 2);
14896 }
14897
14898 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14899 static void
14900 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14901 {
14902 rtx dest;
14903
14904 if (CONST_INT_P (count))
14905 {
14906 HOST_WIDE_INT countval = INTVAL (count);
14907 int offset = 0;
14908
14909 if ((countval & 0x10) && max_size > 16)
14910 {
14911 if (TARGET_64BIT)
14912 {
14913 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14914 emit_insn (gen_strset (destptr, dest, value));
14915 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14916 emit_insn (gen_strset (destptr, dest, value));
14917 }
14918 else
14919 gcc_unreachable ();
14920 offset += 16;
14921 }
14922 if ((countval & 0x08) && max_size > 8)
14923 {
14924 if (TARGET_64BIT)
14925 {
14926 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14927 emit_insn (gen_strset (destptr, dest, value));
14928 }
14929 else
14930 {
14931 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14932 emit_insn (gen_strset (destptr, dest, value));
14933 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14934 emit_insn (gen_strset (destptr, dest, value));
14935 }
14936 offset += 8;
14937 }
14938 if ((countval & 0x04) && max_size > 4)
14939 {
14940 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14941 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14942 offset += 4;
14943 }
14944 if ((countval & 0x02) && max_size > 2)
14945 {
14946 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14947 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14948 offset += 2;
14949 }
14950 if ((countval & 0x01) && max_size > 1)
14951 {
14952 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14953 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14954 offset += 1;
14955 }
14956 return;
14957 }
14958 if (max_size > 32)
14959 {
14960 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14961 return;
14962 }
14963 if (max_size > 16)
14964 {
14965 rtx label = ix86_expand_aligntest (count, 16, true);
14966 if (TARGET_64BIT)
14967 {
14968 dest = change_address (destmem, DImode, destptr);
14969 emit_insn (gen_strset (destptr, dest, value));
14970 emit_insn (gen_strset (destptr, dest, value));
14971 }
14972 else
14973 {
14974 dest = change_address (destmem, SImode, destptr);
14975 emit_insn (gen_strset (destptr, dest, value));
14976 emit_insn (gen_strset (destptr, dest, value));
14977 emit_insn (gen_strset (destptr, dest, value));
14978 emit_insn (gen_strset (destptr, dest, value));
14979 }
14980 emit_label (label);
14981 LABEL_NUSES (label) = 1;
14982 }
14983 if (max_size > 8)
14984 {
14985 rtx label = ix86_expand_aligntest (count, 8, true);
14986 if (TARGET_64BIT)
14987 {
14988 dest = change_address (destmem, DImode, destptr);
14989 emit_insn (gen_strset (destptr, dest, value));
14990 }
14991 else
14992 {
14993 dest = change_address (destmem, SImode, destptr);
14994 emit_insn (gen_strset (destptr, dest, value));
14995 emit_insn (gen_strset (destptr, dest, value));
14996 }
14997 emit_label (label);
14998 LABEL_NUSES (label) = 1;
14999 }
15000 if (max_size > 4)
15001 {
15002 rtx label = ix86_expand_aligntest (count, 4, true);
15003 dest = change_address (destmem, SImode, destptr);
15004 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15005 emit_label (label);
15006 LABEL_NUSES (label) = 1;
15007 }
15008 if (max_size > 2)
15009 {
15010 rtx label = ix86_expand_aligntest (count, 2, true);
15011 dest = change_address (destmem, HImode, destptr);
15012 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15013 emit_label (label);
15014 LABEL_NUSES (label) = 1;
15015 }
15016 if (max_size > 1)
15017 {
15018 rtx label = ix86_expand_aligntest (count, 1, true);
15019 dest = change_address (destmem, QImode, destptr);
15020 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15021 emit_label (label);
15022 LABEL_NUSES (label) = 1;
15023 }
15024 }
15025
15026 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
15027 DESIRED_ALIGNMENT. */
15028 static void
15029 expand_movmem_prologue (rtx destmem, rtx srcmem,
15030 rtx destptr, rtx srcptr, rtx count,
15031 int align, int desired_alignment)
15032 {
15033 if (align <= 1 && desired_alignment > 1)
15034 {
15035 rtx label = ix86_expand_aligntest (destptr, 1, false);
15036 srcmem = change_address (srcmem, QImode, srcptr);
15037 destmem = change_address (destmem, QImode, destptr);
15038 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15039 ix86_adjust_counter (count, 1);
15040 emit_label (label);
15041 LABEL_NUSES (label) = 1;
15042 }
15043 if (align <= 2 && desired_alignment > 2)
15044 {
15045 rtx label = ix86_expand_aligntest (destptr, 2, false);
15046 srcmem = change_address (srcmem, HImode, srcptr);
15047 destmem = change_address (destmem, HImode, destptr);
15048 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15049 ix86_adjust_counter (count, 2);
15050 emit_label (label);
15051 LABEL_NUSES (label) = 1;
15052 }
15053 if (align <= 4 && desired_alignment > 4)
15054 {
15055 rtx label = ix86_expand_aligntest (destptr, 4, false);
15056 srcmem = change_address (srcmem, SImode, srcptr);
15057 destmem = change_address (destmem, SImode, destptr);
15058 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15059 ix86_adjust_counter (count, 4);
15060 emit_label (label);
15061 LABEL_NUSES (label) = 1;
15062 }
15063 gcc_assert (desired_alignment <= 8);
15064 }
15065
15066 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
15067 DESIRED_ALIGNMENT. */
15068 static void
15069 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
15070 int align, int desired_alignment)
15071 {
15072 if (align <= 1 && desired_alignment > 1)
15073 {
15074 rtx label = ix86_expand_aligntest (destptr, 1, false);
15075 destmem = change_address (destmem, QImode, destptr);
15076 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
15077 ix86_adjust_counter (count, 1);
15078 emit_label (label);
15079 LABEL_NUSES (label) = 1;
15080 }
15081 if (align <= 2 && desired_alignment > 2)
15082 {
15083 rtx label = ix86_expand_aligntest (destptr, 2, false);
15084 destmem = change_address (destmem, HImode, destptr);
15085 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
15086 ix86_adjust_counter (count, 2);
15087 emit_label (label);
15088 LABEL_NUSES (label) = 1;
15089 }
15090 if (align <= 4 && desired_alignment > 4)
15091 {
15092 rtx label = ix86_expand_aligntest (destptr, 4, false);
15093 destmem = change_address (destmem, SImode, destptr);
15094 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
15095 ix86_adjust_counter (count, 4);
15096 emit_label (label);
15097 LABEL_NUSES (label) = 1;
15098 }
15099 gcc_assert (desired_alignment <= 8);
15100 }
15101
15102 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
15103 static enum stringop_alg
15104 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
15105 int *dynamic_check)
15106 {
15107 const struct stringop_algs * algs;
15108 /* Algorithms using the rep prefix want at least edi and ecx;
15109 additionally, memset wants eax and memcpy wants esi. Don't
15110 consider such algorithms if the user has appropriated those
15111 registers for their own purposes. */
15112 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
15113 || (memset
15114 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
15115
15116 #define ALG_USABLE_P(alg) (rep_prefix_usable \
15117 || (alg != rep_prefix_1_byte \
15118 && alg != rep_prefix_4_byte \
15119 && alg != rep_prefix_8_byte))
15120
15121 *dynamic_check = -1;
15122 if (memset)
15123 algs = &ix86_cost->memset[TARGET_64BIT != 0];
15124 else
15125 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
15126 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
15127 return stringop_alg;
15128 /* rep; movq or rep; movl is the smallest variant. */
15129 else if (optimize_size)
15130 {
15131 if (!count || (count & 3))
15132 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
15133 else
15134 return rep_prefix_usable ? rep_prefix_4_byte : loop;
15135 }
15136 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15137 */
15138 else if (expected_size != -1 && expected_size < 4)
15139 return loop_1_byte;
15140 else if (expected_size != -1)
15141 {
15142 unsigned int i;
15143 enum stringop_alg alg = libcall;
15144 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15145 {
15146 /* We get here if the algorithms that were not libcall-based
15147 were rep-prefix based and we are unable to use rep prefixes
15148 based on global register usage. Break out of the loop and
15149 use the heuristic below. */
15150 if (algs->size[i].max == 0)
15151 break;
15152 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
15153 {
15154 enum stringop_alg candidate = algs->size[i].alg;
15155
15156 if (candidate != libcall && ALG_USABLE_P (candidate))
15157 alg = candidate;
15158 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
15159 last non-libcall inline algorithm. */
15160 if (TARGET_INLINE_ALL_STRINGOPS)
15161 {
15162 /* When the current size is best to be copied by a libcall,
15163 but we are still forced to inline, run the heuristic below
15164 that will pick code for medium sized blocks. */
15165 if (alg != libcall)
15166 return alg;
15167 break;
15168 }
15169 else if (ALG_USABLE_P (candidate))
15170 return candidate;
15171 }
15172 }
15173 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
15174 }
15175 /* When asked to inline the call anyway, try to pick meaningful choice.
15176 We look for maximal size of block that is faster to copy by hand and
15177 take blocks of at most of that size guessing that average size will
15178 be roughly half of the block.
15179
15180 If this turns out to be bad, we might simply specify the preferred
15181 choice in ix86_costs. */
15182 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15183 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
15184 {
15185 int max = -1;
15186 enum stringop_alg alg;
15187 int i;
15188 bool any_alg_usable_p = true;
15189
15190 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15191 {
15192 enum stringop_alg candidate = algs->size[i].alg;
15193 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
15194
15195 if (candidate != libcall && candidate
15196 && ALG_USABLE_P (candidate))
15197 max = algs->size[i].max;
15198 }
15199 /* If there aren't any usable algorithms, then recursing on
15200 smaller sizes isn't going to find anything. Just return the
15201 simple byte-at-a-time copy loop. */
15202 if (!any_alg_usable_p)
15203 {
15204 /* Pick something reasonable. */
15205 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15206 *dynamic_check = 128;
15207 return loop_1_byte;
15208 }
15209 if (max == -1)
15210 max = 4096;
15211 alg = decide_alg (count, max / 2, memset, dynamic_check);
15212 gcc_assert (*dynamic_check == -1);
15213 gcc_assert (alg != libcall);
15214 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15215 *dynamic_check = max;
15216 return alg;
15217 }
15218 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
15219 #undef ALG_USABLE_P
15220 }
15221
15222 /* Decide on alignment. We know that the operand is already aligned to ALIGN
15223 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
15224 static int
15225 decide_alignment (int align,
15226 enum stringop_alg alg,
15227 int expected_size)
15228 {
15229 int desired_align = 0;
15230 switch (alg)
15231 {
15232 case no_stringop:
15233 gcc_unreachable ();
15234 case loop:
15235 case unrolled_loop:
15236 desired_align = GET_MODE_SIZE (Pmode);
15237 break;
15238 case rep_prefix_8_byte:
15239 desired_align = 8;
15240 break;
15241 case rep_prefix_4_byte:
15242 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15243 copying whole cacheline at once. */
15244 if (TARGET_PENTIUMPRO)
15245 desired_align = 8;
15246 else
15247 desired_align = 4;
15248 break;
15249 case rep_prefix_1_byte:
15250 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15251 copying whole cacheline at once. */
15252 if (TARGET_PENTIUMPRO)
15253 desired_align = 8;
15254 else
15255 desired_align = 1;
15256 break;
15257 case loop_1_byte:
15258 desired_align = 1;
15259 break;
15260 case libcall:
15261 return 0;
15262 }
15263
15264 if (optimize_size)
15265 desired_align = 1;
15266 if (desired_align < align)
15267 desired_align = align;
15268 if (expected_size != -1 && expected_size < 4)
15269 desired_align = align;
15270 return desired_align;
15271 }
15272
15273 /* Return the smallest power of 2 greater than VAL. */
15274 static int
15275 smallest_pow2_greater_than (int val)
15276 {
15277 int ret = 1;
15278 while (ret <= val)
15279 ret <<= 1;
15280 return ret;
15281 }
15282
15283 /* Expand string move (memcpy) operation. Use i386 string operations when
15284 profitable. expand_clrmem contains similar code. The code depends upon
15285 architecture, block size and alignment, but always has the same
15286 overall structure:
15287
15288 1) Prologue guard: Conditional that jumps up to epilogues for small
15289 blocks that can be handled by epilogue alone. This is faster but
15290 also needed for correctness, since prologue assume the block is larger
15291 than the desired alignment.
15292
15293 Optional dynamic check for size and libcall for large
15294 blocks is emitted here too, with -minline-stringops-dynamically.
15295
15296 2) Prologue: copy first few bytes in order to get destination aligned
15297 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
15298 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15299 We emit either a jump tree on power of two sized blocks, or a byte loop.
15300
15301 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15302 with specified algorithm.
15303
15304 4) Epilogue: code copying tail of the block that is too small to be
15305 handled by main body (or up to size guarded by prologue guard). */
15306
15307 int
15308 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
15309 rtx expected_align_exp, rtx expected_size_exp)
15310 {
15311 rtx destreg;
15312 rtx srcreg;
15313 rtx label = NULL;
15314 rtx tmp;
15315 rtx jump_around_label = NULL;
15316 HOST_WIDE_INT align = 1;
15317 unsigned HOST_WIDE_INT count = 0;
15318 HOST_WIDE_INT expected_size = -1;
15319 int size_needed = 0, epilogue_size_needed;
15320 int desired_align = 0;
15321 enum stringop_alg alg;
15322 int dynamic_check;
15323
15324 if (CONST_INT_P (align_exp))
15325 align = INTVAL (align_exp);
15326 /* i386 can do misaligned access on reasonably increased cost. */
15327 if (CONST_INT_P (expected_align_exp)
15328 && INTVAL (expected_align_exp) > align)
15329 align = INTVAL (expected_align_exp);
15330 if (CONST_INT_P (count_exp))
15331 count = expected_size = INTVAL (count_exp);
15332 if (CONST_INT_P (expected_size_exp) && count == 0)
15333 expected_size = INTVAL (expected_size_exp);
15334
15335 /* Step 0: Decide on preferred algorithm, desired alignment and
15336 size of chunks to be copied by main loop. */
15337
15338 alg = decide_alg (count, expected_size, false, &dynamic_check);
15339 desired_align = decide_alignment (align, alg, expected_size);
15340
15341 if (!TARGET_ALIGN_STRINGOPS)
15342 align = desired_align;
15343
15344 if (alg == libcall)
15345 return 0;
15346 gcc_assert (alg != no_stringop);
15347 if (!count)
15348 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
15349 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15350 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
15351 switch (alg)
15352 {
15353 case libcall:
15354 case no_stringop:
15355 gcc_unreachable ();
15356 case loop:
15357 size_needed = GET_MODE_SIZE (Pmode);
15358 break;
15359 case unrolled_loop:
15360 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
15361 break;
15362 case rep_prefix_8_byte:
15363 size_needed = 8;
15364 break;
15365 case rep_prefix_4_byte:
15366 size_needed = 4;
15367 break;
15368 case rep_prefix_1_byte:
15369 case loop_1_byte:
15370 size_needed = 1;
15371 break;
15372 }
15373
15374 epilogue_size_needed = size_needed;
15375
15376 /* Step 1: Prologue guard. */
15377
15378 /* Alignment code needs count to be in register. */
15379 if (CONST_INT_P (count_exp) && desired_align > align)
15380 count_exp = force_reg (counter_mode (count_exp), count_exp);
15381 gcc_assert (desired_align >= 1 && align >= 1);
15382
15383 /* Ensure that alignment prologue won't copy past end of block. */
15384 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15385 {
15386 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15387 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15388 Make sure it is power of 2. */
15389 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15390
15391 if (CONST_INT_P (count_exp))
15392 {
15393 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
15394 goto epilogue;
15395 }
15396 else
15397 {
15398 label = gen_label_rtx ();
15399 emit_cmp_and_jump_insns (count_exp,
15400 GEN_INT (epilogue_size_needed),
15401 LTU, 0, counter_mode (count_exp), 1, label);
15402 if (expected_size == -1 || expected_size < epilogue_size_needed)
15403 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15404 else
15405 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15406 }
15407 }
15408
15409 /* Emit code to decide on runtime whether library call or inline should be
15410 used. */
15411 if (dynamic_check != -1)
15412 {
15413 if (CONST_INT_P (count_exp))
15414 {
15415 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
15416 {
15417 emit_block_move_via_libcall (dst, src, count_exp, false);
15418 count_exp = const0_rtx;
15419 goto epilogue;
15420 }
15421 }
15422 else
15423 {
15424 rtx hot_label = gen_label_rtx ();
15425 jump_around_label = gen_label_rtx ();
15426 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15427 LEU, 0, GET_MODE (count_exp), 1, hot_label);
15428 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15429 emit_block_move_via_libcall (dst, src, count_exp, false);
15430 emit_jump (jump_around_label);
15431 emit_label (hot_label);
15432 }
15433 }
15434
15435 /* Step 2: Alignment prologue. */
15436
15437 if (desired_align > align)
15438 {
15439 /* Except for the first move in epilogue, we no longer know
15440 constant offset in aliasing info. It don't seems to worth
15441 the pain to maintain it for the first move, so throw away
15442 the info early. */
15443 src = change_address (src, BLKmode, srcreg);
15444 dst = change_address (dst, BLKmode, destreg);
15445 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
15446 desired_align);
15447 }
15448 if (label && size_needed == 1)
15449 {
15450 emit_label (label);
15451 LABEL_NUSES (label) = 1;
15452 label = NULL;
15453 }
15454
15455 /* Step 3: Main loop. */
15456
15457 switch (alg)
15458 {
15459 case libcall:
15460 case no_stringop:
15461 gcc_unreachable ();
15462 case loop_1_byte:
15463 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15464 count_exp, QImode, 1, expected_size);
15465 break;
15466 case loop:
15467 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15468 count_exp, Pmode, 1, expected_size);
15469 break;
15470 case unrolled_loop:
15471 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15472 registers for 4 temporaries anyway. */
15473 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15474 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
15475 expected_size);
15476 break;
15477 case rep_prefix_8_byte:
15478 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15479 DImode);
15480 break;
15481 case rep_prefix_4_byte:
15482 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15483 SImode);
15484 break;
15485 case rep_prefix_1_byte:
15486 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15487 QImode);
15488 break;
15489 }
15490 /* Adjust properly the offset of src and dest memory for aliasing. */
15491 if (CONST_INT_P (count_exp))
15492 {
15493 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
15494 (count / size_needed) * size_needed);
15495 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15496 (count / size_needed) * size_needed);
15497 }
15498 else
15499 {
15500 src = change_address (src, BLKmode, srcreg);
15501 dst = change_address (dst, BLKmode, destreg);
15502 }
15503
15504 /* Step 4: Epilogue to copy the remaining bytes. */
15505 epilogue:
15506 if (label)
15507 {
15508 /* When the main loop is done, COUNT_EXP might hold original count,
15509 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15510 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15511 bytes. Compensate if needed. */
15512
15513 if (size_needed < epilogue_size_needed)
15514 {
15515 tmp =
15516 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15517 GEN_INT (size_needed - 1), count_exp, 1,
15518 OPTAB_DIRECT);
15519 if (tmp != count_exp)
15520 emit_move_insn (count_exp, tmp);
15521 }
15522 emit_label (label);
15523 LABEL_NUSES (label) = 1;
15524 }
15525
15526 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15527 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
15528 epilogue_size_needed);
15529 if (jump_around_label)
15530 emit_label (jump_around_label);
15531 return 1;
15532 }
15533
15534 /* Helper function for memcpy. For QImode value 0xXY produce
15535 0xXYXYXYXY of wide specified by MODE. This is essentially
15536 a * 0x10101010, but we can do slightly better than
15537 synth_mult by unwinding the sequence by hand on CPUs with
15538 slow multiply. */
15539 static rtx
15540 promote_duplicated_reg (enum machine_mode mode, rtx val)
15541 {
15542 enum machine_mode valmode = GET_MODE (val);
15543 rtx tmp;
15544 int nops = mode == DImode ? 3 : 2;
15545
15546 gcc_assert (mode == SImode || mode == DImode);
15547 if (val == const0_rtx)
15548 return copy_to_mode_reg (mode, const0_rtx);
15549 if (CONST_INT_P (val))
15550 {
15551 HOST_WIDE_INT v = INTVAL (val) & 255;
15552
15553 v |= v << 8;
15554 v |= v << 16;
15555 if (mode == DImode)
15556 v |= (v << 16) << 16;
15557 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15558 }
15559
15560 if (valmode == VOIDmode)
15561 valmode = QImode;
15562 if (valmode != QImode)
15563 val = gen_lowpart (QImode, val);
15564 if (mode == QImode)
15565 return val;
15566 if (!TARGET_PARTIAL_REG_STALL)
15567 nops--;
15568 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15569 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15570 <= (ix86_cost->shift_const + ix86_cost->add) * nops
15571 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15572 {
15573 rtx reg = convert_modes (mode, QImode, val, true);
15574 tmp = promote_duplicated_reg (mode, const1_rtx);
15575 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15576 OPTAB_DIRECT);
15577 }
15578 else
15579 {
15580 rtx reg = convert_modes (mode, QImode, val, true);
15581
15582 if (!TARGET_PARTIAL_REG_STALL)
15583 if (mode == SImode)
15584 emit_insn (gen_movsi_insv_1 (reg, reg));
15585 else
15586 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15587 else
15588 {
15589 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15590 NULL, 1, OPTAB_DIRECT);
15591 reg =
15592 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15593 }
15594 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15595 NULL, 1, OPTAB_DIRECT);
15596 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15597 if (mode == SImode)
15598 return reg;
15599 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15600 NULL, 1, OPTAB_DIRECT);
15601 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15602 return reg;
15603 }
15604 }
15605
15606 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15607 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15608 alignment from ALIGN to DESIRED_ALIGN. */
15609 static rtx
15610 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15611 {
15612 rtx promoted_val;
15613
15614 if (TARGET_64BIT
15615 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15616 promoted_val = promote_duplicated_reg (DImode, val);
15617 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15618 promoted_val = promote_duplicated_reg (SImode, val);
15619 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15620 promoted_val = promote_duplicated_reg (HImode, val);
15621 else
15622 promoted_val = val;
15623
15624 return promoted_val;
15625 }
15626
15627 /* Expand string clear operation (bzero). Use i386 string operations when
15628 profitable. See expand_movmem comment for explanation of individual
15629 steps performed. */
15630 int
15631 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15632 rtx expected_align_exp, rtx expected_size_exp)
15633 {
15634 rtx destreg;
15635 rtx label = NULL;
15636 rtx tmp;
15637 rtx jump_around_label = NULL;
15638 HOST_WIDE_INT align = 1;
15639 unsigned HOST_WIDE_INT count = 0;
15640 HOST_WIDE_INT expected_size = -1;
15641 int size_needed = 0, epilogue_size_needed;
15642 int desired_align = 0;
15643 enum stringop_alg alg;
15644 rtx promoted_val = NULL;
15645 bool force_loopy_epilogue = false;
15646 int dynamic_check;
15647
15648 if (CONST_INT_P (align_exp))
15649 align = INTVAL (align_exp);
15650 /* i386 can do misaligned access on reasonably increased cost. */
15651 if (CONST_INT_P (expected_align_exp)
15652 && INTVAL (expected_align_exp) > align)
15653 align = INTVAL (expected_align_exp);
15654 if (CONST_INT_P (count_exp))
15655 count = expected_size = INTVAL (count_exp);
15656 if (CONST_INT_P (expected_size_exp) && count == 0)
15657 expected_size = INTVAL (expected_size_exp);
15658
15659 /* Step 0: Decide on preferred algorithm, desired alignment and
15660 size of chunks to be copied by main loop. */
15661
15662 alg = decide_alg (count, expected_size, true, &dynamic_check);
15663 desired_align = decide_alignment (align, alg, expected_size);
15664
15665 if (!TARGET_ALIGN_STRINGOPS)
15666 align = desired_align;
15667
15668 if (alg == libcall)
15669 return 0;
15670 gcc_assert (alg != no_stringop);
15671 if (!count)
15672 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15673 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15674 switch (alg)
15675 {
15676 case libcall:
15677 case no_stringop:
15678 gcc_unreachable ();
15679 case loop:
15680 size_needed = GET_MODE_SIZE (Pmode);
15681 break;
15682 case unrolled_loop:
15683 size_needed = GET_MODE_SIZE (Pmode) * 4;
15684 break;
15685 case rep_prefix_8_byte:
15686 size_needed = 8;
15687 break;
15688 case rep_prefix_4_byte:
15689 size_needed = 4;
15690 break;
15691 case rep_prefix_1_byte:
15692 case loop_1_byte:
15693 size_needed = 1;
15694 break;
15695 }
15696 epilogue_size_needed = size_needed;
15697
15698 /* Step 1: Prologue guard. */
15699
15700 /* Alignment code needs count to be in register. */
15701 if (CONST_INT_P (count_exp) && desired_align > align)
15702 {
15703 enum machine_mode mode = SImode;
15704 if (TARGET_64BIT && (count & ~0xffffffff))
15705 mode = DImode;
15706 count_exp = force_reg (mode, count_exp);
15707 }
15708 /* Do the cheap promotion to allow better CSE across the
15709 main loop and epilogue (ie one load of the big constant in the
15710 front of all code. */
15711 if (CONST_INT_P (val_exp))
15712 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15713 desired_align, align);
15714 /* Ensure that alignment prologue won't copy past end of block. */
15715 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15716 {
15717 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15718 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15719 Make sure it is power of 2. */
15720 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15721
15722 /* To improve performance of small blocks, we jump around the VAL
15723 promoting mode. This mean that if the promoted VAL is not constant,
15724 we might not use it in the epilogue and have to use byte
15725 loop variant. */
15726 if (epilogue_size_needed > 2 && !promoted_val)
15727 force_loopy_epilogue = true;
15728 label = gen_label_rtx ();
15729 emit_cmp_and_jump_insns (count_exp,
15730 GEN_INT (epilogue_size_needed),
15731 LTU, 0, counter_mode (count_exp), 1, label);
15732 if (GET_CODE (count_exp) == CONST_INT)
15733 ;
15734 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15735 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15736 else
15737 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15738 }
15739 if (dynamic_check != -1)
15740 {
15741 rtx hot_label = gen_label_rtx ();
15742 jump_around_label = gen_label_rtx ();
15743 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15744 LEU, 0, counter_mode (count_exp), 1, hot_label);
15745 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15746 set_storage_via_libcall (dst, count_exp, val_exp, false);
15747 emit_jump (jump_around_label);
15748 emit_label (hot_label);
15749 }
15750
15751 /* Step 2: Alignment prologue. */
15752
15753 /* Do the expensive promotion once we branched off the small blocks. */
15754 if (!promoted_val)
15755 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15756 desired_align, align);
15757 gcc_assert (desired_align >= 1 && align >= 1);
15758
15759 if (desired_align > align)
15760 {
15761 /* Except for the first move in epilogue, we no longer know
15762 constant offset in aliasing info. It don't seems to worth
15763 the pain to maintain it for the first move, so throw away
15764 the info early. */
15765 dst = change_address (dst, BLKmode, destreg);
15766 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15767 desired_align);
15768 }
15769 if (label && size_needed == 1)
15770 {
15771 emit_label (label);
15772 LABEL_NUSES (label) = 1;
15773 label = NULL;
15774 }
15775
15776 /* Step 3: Main loop. */
15777
15778 switch (alg)
15779 {
15780 case libcall:
15781 case no_stringop:
15782 gcc_unreachable ();
15783 case loop_1_byte:
15784 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15785 count_exp, QImode, 1, expected_size);
15786 break;
15787 case loop:
15788 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15789 count_exp, Pmode, 1, expected_size);
15790 break;
15791 case unrolled_loop:
15792 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15793 count_exp, Pmode, 4, expected_size);
15794 break;
15795 case rep_prefix_8_byte:
15796 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15797 DImode);
15798 break;
15799 case rep_prefix_4_byte:
15800 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15801 SImode);
15802 break;
15803 case rep_prefix_1_byte:
15804 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15805 QImode);
15806 break;
15807 }
15808 /* Adjust properly the offset of src and dest memory for aliasing. */
15809 if (CONST_INT_P (count_exp))
15810 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15811 (count / size_needed) * size_needed);
15812 else
15813 dst = change_address (dst, BLKmode, destreg);
15814
15815 /* Step 4: Epilogue to copy the remaining bytes. */
15816
15817 if (label)
15818 {
15819 /* When the main loop is done, COUNT_EXP might hold original count,
15820 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15821 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15822 bytes. Compensate if needed. */
15823
15824 if (size_needed < desired_align - align)
15825 {
15826 tmp =
15827 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15828 GEN_INT (size_needed - 1), count_exp, 1,
15829 OPTAB_DIRECT);
15830 size_needed = desired_align - align + 1;
15831 if (tmp != count_exp)
15832 emit_move_insn (count_exp, tmp);
15833 }
15834 emit_label (label);
15835 LABEL_NUSES (label) = 1;
15836 }
15837 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15838 {
15839 if (force_loopy_epilogue)
15840 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
15841 size_needed);
15842 else
15843 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
15844 size_needed);
15845 }
15846 if (jump_around_label)
15847 emit_label (jump_around_label);
15848 return 1;
15849 }
15850
15851 /* Expand the appropriate insns for doing strlen if not just doing
15852 repnz; scasb
15853
15854 out = result, initialized with the start address
15855 align_rtx = alignment of the address.
15856 scratch = scratch register, initialized with the startaddress when
15857 not aligned, otherwise undefined
15858
15859 This is just the body. It needs the initializations mentioned above and
15860 some address computing at the end. These things are done in i386.md. */
15861
15862 static void
15863 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
15864 {
15865 int align;
15866 rtx tmp;
15867 rtx align_2_label = NULL_RTX;
15868 rtx align_3_label = NULL_RTX;
15869 rtx align_4_label = gen_label_rtx ();
15870 rtx end_0_label = gen_label_rtx ();
15871 rtx mem;
15872 rtx tmpreg = gen_reg_rtx (SImode);
15873 rtx scratch = gen_reg_rtx (SImode);
15874 rtx cmp;
15875
15876 align = 0;
15877 if (CONST_INT_P (align_rtx))
15878 align = INTVAL (align_rtx);
15879
15880 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15881
15882 /* Is there a known alignment and is it less than 4? */
15883 if (align < 4)
15884 {
15885 rtx scratch1 = gen_reg_rtx (Pmode);
15886 emit_move_insn (scratch1, out);
15887 /* Is there a known alignment and is it not 2? */
15888 if (align != 2)
15889 {
15890 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
15891 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
15892
15893 /* Leave just the 3 lower bits. */
15894 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
15895 NULL_RTX, 0, OPTAB_WIDEN);
15896
15897 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15898 Pmode, 1, align_4_label);
15899 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
15900 Pmode, 1, align_2_label);
15901 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
15902 Pmode, 1, align_3_label);
15903 }
15904 else
15905 {
15906 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15907 check if is aligned to 4 - byte. */
15908
15909 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
15910 NULL_RTX, 0, OPTAB_WIDEN);
15911
15912 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15913 Pmode, 1, align_4_label);
15914 }
15915
15916 mem = change_address (src, QImode, out);
15917
15918 /* Now compare the bytes. */
15919
15920 /* Compare the first n unaligned byte on a byte per byte basis. */
15921 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15922 QImode, 1, end_0_label);
15923
15924 /* Increment the address. */
15925 if (TARGET_64BIT)
15926 emit_insn (gen_adddi3 (out, out, const1_rtx));
15927 else
15928 emit_insn (gen_addsi3 (out, out, const1_rtx));
15929
15930 /* Not needed with an alignment of 2 */
15931 if (align != 2)
15932 {
15933 emit_label (align_2_label);
15934
15935 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15936 end_0_label);
15937
15938 if (TARGET_64BIT)
15939 emit_insn (gen_adddi3 (out, out, const1_rtx));
15940 else
15941 emit_insn (gen_addsi3 (out, out, const1_rtx));
15942
15943 emit_label (align_3_label);
15944 }
15945
15946 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15947 end_0_label);
15948
15949 if (TARGET_64BIT)
15950 emit_insn (gen_adddi3 (out, out, const1_rtx));
15951 else
15952 emit_insn (gen_addsi3 (out, out, const1_rtx));
15953 }
15954
15955 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15956 align this loop. It gives only huge programs, but does not help to
15957 speed up. */
15958 emit_label (align_4_label);
15959
15960 mem = change_address (src, SImode, out);
15961 emit_move_insn (scratch, mem);
15962 if (TARGET_64BIT)
15963 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15964 else
15965 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15966
15967 /* This formula yields a nonzero result iff one of the bytes is zero.
15968 This saves three branches inside loop and many cycles. */
15969
15970 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15971 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15972 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
15973 emit_insn (gen_andsi3 (tmpreg, tmpreg,
15974 gen_int_mode (0x80808080, SImode)));
15975 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
15976 align_4_label);
15977
15978 if (TARGET_CMOVE)
15979 {
15980 rtx reg = gen_reg_rtx (SImode);
15981 rtx reg2 = gen_reg_rtx (Pmode);
15982 emit_move_insn (reg, tmpreg);
15983 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
15984
15985 /* If zero is not in the first two bytes, move two bytes forward. */
15986 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15987 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15988 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15989 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
15990 gen_rtx_IF_THEN_ELSE (SImode, tmp,
15991 reg,
15992 tmpreg)));
15993 /* Emit lea manually to avoid clobbering of flags. */
15994 emit_insn (gen_rtx_SET (SImode, reg2,
15995 gen_rtx_PLUS (Pmode, out, const2_rtx)));
15996
15997 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15998 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15999 emit_insn (gen_rtx_SET (VOIDmode, out,
16000 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
16001 reg2,
16002 out)));
16003
16004 }
16005 else
16006 {
16007 rtx end_2_label = gen_label_rtx ();
16008 /* Is zero in the first two bytes? */
16009
16010 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16011 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16012 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
16013 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16014 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
16015 pc_rtx);
16016 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16017 JUMP_LABEL (tmp) = end_2_label;
16018
16019 /* Not in the first two. Move two bytes forward. */
16020 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
16021 if (TARGET_64BIT)
16022 emit_insn (gen_adddi3 (out, out, const2_rtx));
16023 else
16024 emit_insn (gen_addsi3 (out, out, const2_rtx));
16025
16026 emit_label (end_2_label);
16027
16028 }
16029
16030 /* Avoid branch in fixing the byte. */
16031 tmpreg = gen_lowpart (QImode, tmpreg);
16032 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
16033 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
16034 if (TARGET_64BIT)
16035 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
16036 else
16037 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
16038
16039 emit_label (end_0_label);
16040 }
16041
16042 /* Expand strlen. */
16043
16044 int
16045 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
16046 {
16047 rtx addr, scratch1, scratch2, scratch3, scratch4;
16048
16049 /* The generic case of strlen expander is long. Avoid it's
16050 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
16051
16052 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16053 && !TARGET_INLINE_ALL_STRINGOPS
16054 && !optimize_size
16055 && (!CONST_INT_P (align) || INTVAL (align) < 4))
16056 return 0;
16057
16058 addr = force_reg (Pmode, XEXP (src, 0));
16059 scratch1 = gen_reg_rtx (Pmode);
16060
16061 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16062 && !optimize_size)
16063 {
16064 /* Well it seems that some optimizer does not combine a call like
16065 foo(strlen(bar), strlen(bar));
16066 when the move and the subtraction is done here. It does calculate
16067 the length just once when these instructions are done inside of
16068 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
16069 often used and I use one fewer register for the lifetime of
16070 output_strlen_unroll() this is better. */
16071
16072 emit_move_insn (out, addr);
16073
16074 ix86_expand_strlensi_unroll_1 (out, src, align);
16075
16076 /* strlensi_unroll_1 returns the address of the zero at the end of
16077 the string, like memchr(), so compute the length by subtracting
16078 the start address. */
16079 if (TARGET_64BIT)
16080 emit_insn (gen_subdi3 (out, out, addr));
16081 else
16082 emit_insn (gen_subsi3 (out, out, addr));
16083 }
16084 else
16085 {
16086 rtx unspec;
16087
16088 /* Can't use this if the user has appropriated eax, ecx, or edi. */
16089 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
16090 return false;
16091
16092 scratch2 = gen_reg_rtx (Pmode);
16093 scratch3 = gen_reg_rtx (Pmode);
16094 scratch4 = force_reg (Pmode, constm1_rtx);
16095
16096 emit_move_insn (scratch3, addr);
16097 eoschar = force_reg (QImode, eoschar);
16098
16099 src = replace_equiv_address_nv (src, scratch3);
16100
16101 /* If .md starts supporting :P, this can be done in .md. */
16102 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
16103 scratch4), UNSPEC_SCAS);
16104 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
16105 if (TARGET_64BIT)
16106 {
16107 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
16108 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
16109 }
16110 else
16111 {
16112 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
16113 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
16114 }
16115 }
16116 return 1;
16117 }
16118
16119 /* For given symbol (function) construct code to compute address of it's PLT
16120 entry in large x86-64 PIC model. */
16121 rtx
16122 construct_plt_address (rtx symbol)
16123 {
16124 rtx tmp = gen_reg_rtx (Pmode);
16125 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
16126
16127 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
16128 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
16129
16130 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
16131 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
16132 return tmp;
16133 }
16134
16135 void
16136 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
16137 rtx callarg2 ATTRIBUTE_UNUSED,
16138 rtx pop, int sibcall)
16139 {
16140 rtx use = NULL, call;
16141
16142 if (pop == const0_rtx)
16143 pop = NULL;
16144 gcc_assert (!TARGET_64BIT || !pop);
16145
16146 if (TARGET_MACHO && !TARGET_64BIT)
16147 {
16148 #if TARGET_MACHO
16149 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
16150 fnaddr = machopic_indirect_call_target (fnaddr);
16151 #endif
16152 }
16153 else
16154 {
16155 /* Static functions and indirect calls don't need the pic register. */
16156 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
16157 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16158 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
16159 use_reg (&use, pic_offset_table_rtx);
16160 }
16161
16162 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
16163 {
16164 rtx al = gen_rtx_REG (QImode, AX_REG);
16165 emit_move_insn (al, callarg2);
16166 use_reg (&use, al);
16167 }
16168
16169 if (ix86_cmodel == CM_LARGE_PIC
16170 && GET_CODE (fnaddr) == MEM
16171 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16172 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
16173 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
16174 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
16175 {
16176 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16177 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16178 }
16179 if (sibcall && TARGET_64BIT
16180 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
16181 {
16182 rtx addr;
16183 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16184 fnaddr = gen_rtx_REG (Pmode, R11_REG);
16185 emit_move_insn (fnaddr, addr);
16186 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16187 }
16188
16189 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
16190 if (retval)
16191 call = gen_rtx_SET (VOIDmode, retval, call);
16192 if (pop)
16193 {
16194 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
16195 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
16196 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
16197 }
16198
16199 call = emit_call_insn (call);
16200 if (use)
16201 CALL_INSN_FUNCTION_USAGE (call) = use;
16202 }
16203
16204 \f
16205 /* Clear stack slot assignments remembered from previous functions.
16206 This is called from INIT_EXPANDERS once before RTL is emitted for each
16207 function. */
16208
16209 static struct machine_function *
16210 ix86_init_machine_status (void)
16211 {
16212 struct machine_function *f;
16213
16214 f = GGC_CNEW (struct machine_function);
16215 f->use_fast_prologue_epilogue_nregs = -1;
16216 f->tls_descriptor_call_expanded_p = 0;
16217
16218 return f;
16219 }
16220
16221 /* Return a MEM corresponding to a stack slot with mode MODE.
16222 Allocate a new slot if necessary.
16223
16224 The RTL for a function can have several slots available: N is
16225 which slot to use. */
16226
16227 rtx
16228 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
16229 {
16230 struct stack_local_entry *s;
16231
16232 gcc_assert (n < MAX_386_STACK_LOCALS);
16233
16234 /* Virtual slot is valid only before vregs are instantiated. */
16235 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
16236
16237 for (s = ix86_stack_locals; s; s = s->next)
16238 if (s->mode == mode && s->n == n)
16239 return copy_rtx (s->rtl);
16240
16241 s = (struct stack_local_entry *)
16242 ggc_alloc (sizeof (struct stack_local_entry));
16243 s->n = n;
16244 s->mode = mode;
16245 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16246
16247 s->next = ix86_stack_locals;
16248 ix86_stack_locals = s;
16249 return s->rtl;
16250 }
16251
16252 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16253
16254 static GTY(()) rtx ix86_tls_symbol;
16255 rtx
16256 ix86_tls_get_addr (void)
16257 {
16258
16259 if (!ix86_tls_symbol)
16260 {
16261 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
16262 (TARGET_ANY_GNU_TLS
16263 && !TARGET_64BIT)
16264 ? "___tls_get_addr"
16265 : "__tls_get_addr");
16266 }
16267
16268 return ix86_tls_symbol;
16269 }
16270
16271 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16272
16273 static GTY(()) rtx ix86_tls_module_base_symbol;
16274 rtx
16275 ix86_tls_module_base (void)
16276 {
16277
16278 if (!ix86_tls_module_base_symbol)
16279 {
16280 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
16281 "_TLS_MODULE_BASE_");
16282 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16283 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16284 }
16285
16286 return ix86_tls_module_base_symbol;
16287 }
16288 \f
16289 /* Calculate the length of the memory address in the instruction
16290 encoding. Does not include the one-byte modrm, opcode, or prefix. */
16291
16292 int
16293 memory_address_length (rtx addr)
16294 {
16295 struct ix86_address parts;
16296 rtx base, index, disp;
16297 int len;
16298 int ok;
16299
16300 if (GET_CODE (addr) == PRE_DEC
16301 || GET_CODE (addr) == POST_INC
16302 || GET_CODE (addr) == PRE_MODIFY
16303 || GET_CODE (addr) == POST_MODIFY)
16304 return 0;
16305
16306 ok = ix86_decompose_address (addr, &parts);
16307 gcc_assert (ok);
16308
16309 if (parts.base && GET_CODE (parts.base) == SUBREG)
16310 parts.base = SUBREG_REG (parts.base);
16311 if (parts.index && GET_CODE (parts.index) == SUBREG)
16312 parts.index = SUBREG_REG (parts.index);
16313
16314 base = parts.base;
16315 index = parts.index;
16316 disp = parts.disp;
16317 len = 0;
16318
16319 /* Rule of thumb:
16320 - esp as the base always wants an index,
16321 - ebp as the base always wants a displacement. */
16322
16323 /* Register Indirect. */
16324 if (base && !index && !disp)
16325 {
16326 /* esp (for its index) and ebp (for its displacement) need
16327 the two-byte modrm form. */
16328 if (addr == stack_pointer_rtx
16329 || addr == arg_pointer_rtx
16330 || addr == frame_pointer_rtx
16331 || addr == hard_frame_pointer_rtx)
16332 len = 1;
16333 }
16334
16335 /* Direct Addressing. */
16336 else if (disp && !base && !index)
16337 len = 4;
16338
16339 else
16340 {
16341 /* Find the length of the displacement constant. */
16342 if (disp)
16343 {
16344 if (base && satisfies_constraint_K (disp))
16345 len = 1;
16346 else
16347 len = 4;
16348 }
16349 /* ebp always wants a displacement. */
16350 else if (base == hard_frame_pointer_rtx)
16351 len = 1;
16352
16353 /* An index requires the two-byte modrm form.... */
16354 if (index
16355 /* ...like esp, which always wants an index. */
16356 || base == stack_pointer_rtx
16357 || base == arg_pointer_rtx
16358 || base == frame_pointer_rtx)
16359 len += 1;
16360 }
16361
16362 return len;
16363 }
16364
16365 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16366 is set, expect that insn have 8bit immediate alternative. */
16367 int
16368 ix86_attr_length_immediate_default (rtx insn, int shortform)
16369 {
16370 int len = 0;
16371 int i;
16372 extract_insn_cached (insn);
16373 for (i = recog_data.n_operands - 1; i >= 0; --i)
16374 if (CONSTANT_P (recog_data.operand[i]))
16375 {
16376 gcc_assert (!len);
16377 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
16378 len = 1;
16379 else
16380 {
16381 switch (get_attr_mode (insn))
16382 {
16383 case MODE_QI:
16384 len+=1;
16385 break;
16386 case MODE_HI:
16387 len+=2;
16388 break;
16389 case MODE_SI:
16390 len+=4;
16391 break;
16392 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16393 case MODE_DI:
16394 len+=4;
16395 break;
16396 default:
16397 fatal_insn ("unknown insn mode", insn);
16398 }
16399 }
16400 }
16401 return len;
16402 }
16403 /* Compute default value for "length_address" attribute. */
16404 int
16405 ix86_attr_length_address_default (rtx insn)
16406 {
16407 int i;
16408
16409 if (get_attr_type (insn) == TYPE_LEA)
16410 {
16411 rtx set = PATTERN (insn);
16412
16413 if (GET_CODE (set) == PARALLEL)
16414 set = XVECEXP (set, 0, 0);
16415
16416 gcc_assert (GET_CODE (set) == SET);
16417
16418 return memory_address_length (SET_SRC (set));
16419 }
16420
16421 extract_insn_cached (insn);
16422 for (i = recog_data.n_operands - 1; i >= 0; --i)
16423 if (MEM_P (recog_data.operand[i]))
16424 {
16425 return memory_address_length (XEXP (recog_data.operand[i], 0));
16426 break;
16427 }
16428 return 0;
16429 }
16430 \f
16431 /* Return the maximum number of instructions a cpu can issue. */
16432
16433 static int
16434 ix86_issue_rate (void)
16435 {
16436 switch (ix86_tune)
16437 {
16438 case PROCESSOR_PENTIUM:
16439 case PROCESSOR_K6:
16440 return 2;
16441
16442 case PROCESSOR_PENTIUMPRO:
16443 case PROCESSOR_PENTIUM4:
16444 case PROCESSOR_ATHLON:
16445 case PROCESSOR_K8:
16446 case PROCESSOR_AMDFAM10:
16447 case PROCESSOR_NOCONA:
16448 case PROCESSOR_GENERIC32:
16449 case PROCESSOR_GENERIC64:
16450 return 3;
16451
16452 case PROCESSOR_CORE2:
16453 return 4;
16454
16455 default:
16456 return 1;
16457 }
16458 }
16459
16460 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16461 by DEP_INSN and nothing set by DEP_INSN. */
16462
16463 static int
16464 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16465 {
16466 rtx set, set2;
16467
16468 /* Simplify the test for uninteresting insns. */
16469 if (insn_type != TYPE_SETCC
16470 && insn_type != TYPE_ICMOV
16471 && insn_type != TYPE_FCMOV
16472 && insn_type != TYPE_IBR)
16473 return 0;
16474
16475 if ((set = single_set (dep_insn)) != 0)
16476 {
16477 set = SET_DEST (set);
16478 set2 = NULL_RTX;
16479 }
16480 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
16481 && XVECLEN (PATTERN (dep_insn), 0) == 2
16482 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
16483 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
16484 {
16485 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16486 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16487 }
16488 else
16489 return 0;
16490
16491 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
16492 return 0;
16493
16494 /* This test is true if the dependent insn reads the flags but
16495 not any other potentially set register. */
16496 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
16497 return 0;
16498
16499 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
16500 return 0;
16501
16502 return 1;
16503 }
16504
16505 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16506 address with operands set by DEP_INSN. */
16507
16508 static int
16509 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16510 {
16511 rtx addr;
16512
16513 if (insn_type == TYPE_LEA
16514 && TARGET_PENTIUM)
16515 {
16516 addr = PATTERN (insn);
16517
16518 if (GET_CODE (addr) == PARALLEL)
16519 addr = XVECEXP (addr, 0, 0);
16520
16521 gcc_assert (GET_CODE (addr) == SET);
16522
16523 addr = SET_SRC (addr);
16524 }
16525 else
16526 {
16527 int i;
16528 extract_insn_cached (insn);
16529 for (i = recog_data.n_operands - 1; i >= 0; --i)
16530 if (MEM_P (recog_data.operand[i]))
16531 {
16532 addr = XEXP (recog_data.operand[i], 0);
16533 goto found;
16534 }
16535 return 0;
16536 found:;
16537 }
16538
16539 return modified_in_p (addr, dep_insn);
16540 }
16541
16542 static int
16543 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
16544 {
16545 enum attr_type insn_type, dep_insn_type;
16546 enum attr_memory memory;
16547 rtx set, set2;
16548 int dep_insn_code_number;
16549
16550 /* Anti and output dependencies have zero cost on all CPUs. */
16551 if (REG_NOTE_KIND (link) != 0)
16552 return 0;
16553
16554 dep_insn_code_number = recog_memoized (dep_insn);
16555
16556 /* If we can't recognize the insns, we can't really do anything. */
16557 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16558 return cost;
16559
16560 insn_type = get_attr_type (insn);
16561 dep_insn_type = get_attr_type (dep_insn);
16562
16563 switch (ix86_tune)
16564 {
16565 case PROCESSOR_PENTIUM:
16566 /* Address Generation Interlock adds a cycle of latency. */
16567 if (ix86_agi_dependent (insn, dep_insn, insn_type))
16568 cost += 1;
16569
16570 /* ??? Compares pair with jump/setcc. */
16571 if (ix86_flags_dependent (insn, dep_insn, insn_type))
16572 cost = 0;
16573
16574 /* Floating point stores require value to be ready one cycle earlier. */
16575 if (insn_type == TYPE_FMOV
16576 && get_attr_memory (insn) == MEMORY_STORE
16577 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16578 cost += 1;
16579 break;
16580
16581 case PROCESSOR_PENTIUMPRO:
16582 memory = get_attr_memory (insn);
16583
16584 /* INT->FP conversion is expensive. */
16585 if (get_attr_fp_int_src (dep_insn))
16586 cost += 5;
16587
16588 /* There is one cycle extra latency between an FP op and a store. */
16589 if (insn_type == TYPE_FMOV
16590 && (set = single_set (dep_insn)) != NULL_RTX
16591 && (set2 = single_set (insn)) != NULL_RTX
16592 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
16593 && MEM_P (SET_DEST (set2)))
16594 cost += 1;
16595
16596 /* Show ability of reorder buffer to hide latency of load by executing
16597 in parallel with previous instruction in case
16598 previous instruction is not needed to compute the address. */
16599 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16600 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16601 {
16602 /* Claim moves to take one cycle, as core can issue one load
16603 at time and the next load can start cycle later. */
16604 if (dep_insn_type == TYPE_IMOV
16605 || dep_insn_type == TYPE_FMOV)
16606 cost = 1;
16607 else if (cost > 1)
16608 cost--;
16609 }
16610 break;
16611
16612 case PROCESSOR_K6:
16613 memory = get_attr_memory (insn);
16614
16615 /* The esp dependency is resolved before the instruction is really
16616 finished. */
16617 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16618 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16619 return 1;
16620
16621 /* INT->FP conversion is expensive. */
16622 if (get_attr_fp_int_src (dep_insn))
16623 cost += 5;
16624
16625 /* Show ability of reorder buffer to hide latency of load by executing
16626 in parallel with previous instruction in case
16627 previous instruction is not needed to compute the address. */
16628 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16629 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16630 {
16631 /* Claim moves to take one cycle, as core can issue one load
16632 at time and the next load can start cycle later. */
16633 if (dep_insn_type == TYPE_IMOV
16634 || dep_insn_type == TYPE_FMOV)
16635 cost = 1;
16636 else if (cost > 2)
16637 cost -= 2;
16638 else
16639 cost = 1;
16640 }
16641 break;
16642
16643 case PROCESSOR_ATHLON:
16644 case PROCESSOR_K8:
16645 case PROCESSOR_AMDFAM10:
16646 case PROCESSOR_GENERIC32:
16647 case PROCESSOR_GENERIC64:
16648 memory = get_attr_memory (insn);
16649
16650 /* Show ability of reorder buffer to hide latency of load by executing
16651 in parallel with previous instruction in case
16652 previous instruction is not needed to compute the address. */
16653 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16654 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16655 {
16656 enum attr_unit unit = get_attr_unit (insn);
16657 int loadcost = 3;
16658
16659 /* Because of the difference between the length of integer and
16660 floating unit pipeline preparation stages, the memory operands
16661 for floating point are cheaper.
16662
16663 ??? For Athlon it the difference is most probably 2. */
16664 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16665 loadcost = 3;
16666 else
16667 loadcost = TARGET_ATHLON ? 2 : 0;
16668
16669 if (cost >= loadcost)
16670 cost -= loadcost;
16671 else
16672 cost = 0;
16673 }
16674
16675 default:
16676 break;
16677 }
16678
16679 return cost;
16680 }
16681
16682 /* How many alternative schedules to try. This should be as wide as the
16683 scheduling freedom in the DFA, but no wider. Making this value too
16684 large results extra work for the scheduler. */
16685
16686 static int
16687 ia32_multipass_dfa_lookahead (void)
16688 {
16689 switch (ix86_tune)
16690 {
16691 case PROCESSOR_PENTIUM:
16692 return 2;
16693
16694 case PROCESSOR_PENTIUMPRO:
16695 case PROCESSOR_K6:
16696 return 1;
16697
16698 default:
16699 return 0;
16700 }
16701 }
16702
16703 \f
16704 /* Compute the alignment given to a constant that is being placed in memory.
16705 EXP is the constant and ALIGN is the alignment that the object would
16706 ordinarily have.
16707 The value of this function is used instead of that alignment to align
16708 the object. */
16709
16710 int
16711 ix86_constant_alignment (tree exp, int align)
16712 {
16713 if (TREE_CODE (exp) == REAL_CST)
16714 {
16715 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16716 return 64;
16717 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16718 return 128;
16719 }
16720 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16721 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16722 return BITS_PER_WORD;
16723
16724 return align;
16725 }
16726
16727 /* Compute the alignment for a static variable.
16728 TYPE is the data type, and ALIGN is the alignment that
16729 the object would ordinarily have. The value of this function is used
16730 instead of that alignment to align the object. */
16731
16732 int
16733 ix86_data_alignment (tree type, int align)
16734 {
16735 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16736
16737 if (AGGREGATE_TYPE_P (type)
16738 && TYPE_SIZE (type)
16739 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16740 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16741 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16742 && align < max_align)
16743 align = max_align;
16744
16745 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16746 to 16byte boundary. */
16747 if (TARGET_64BIT)
16748 {
16749 if (AGGREGATE_TYPE_P (type)
16750 && TYPE_SIZE (type)
16751 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16752 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16753 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16754 return 128;
16755 }
16756
16757 if (TREE_CODE (type) == ARRAY_TYPE)
16758 {
16759 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16760 return 64;
16761 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16762 return 128;
16763 }
16764 else if (TREE_CODE (type) == COMPLEX_TYPE)
16765 {
16766
16767 if (TYPE_MODE (type) == DCmode && align < 64)
16768 return 64;
16769 if (TYPE_MODE (type) == XCmode && align < 128)
16770 return 128;
16771 }
16772 else if ((TREE_CODE (type) == RECORD_TYPE
16773 || TREE_CODE (type) == UNION_TYPE
16774 || TREE_CODE (type) == QUAL_UNION_TYPE)
16775 && TYPE_FIELDS (type))
16776 {
16777 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16778 return 64;
16779 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16780 return 128;
16781 }
16782 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16783 || TREE_CODE (type) == INTEGER_TYPE)
16784 {
16785 if (TYPE_MODE (type) == DFmode && align < 64)
16786 return 64;
16787 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16788 return 128;
16789 }
16790
16791 return align;
16792 }
16793
16794 /* Compute the alignment for a local variable.
16795 TYPE is the data type, and ALIGN is the alignment that
16796 the object would ordinarily have. The value of this macro is used
16797 instead of that alignment to align the object. */
16798
16799 int
16800 ix86_local_alignment (tree type, int align)
16801 {
16802 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16803 to 16byte boundary. */
16804 if (TARGET_64BIT)
16805 {
16806 if (AGGREGATE_TYPE_P (type)
16807 && TYPE_SIZE (type)
16808 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16809 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16810 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16811 return 128;
16812 }
16813 if (TREE_CODE (type) == ARRAY_TYPE)
16814 {
16815 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16816 return 64;
16817 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16818 return 128;
16819 }
16820 else if (TREE_CODE (type) == COMPLEX_TYPE)
16821 {
16822 if (TYPE_MODE (type) == DCmode && align < 64)
16823 return 64;
16824 if (TYPE_MODE (type) == XCmode && align < 128)
16825 return 128;
16826 }
16827 else if ((TREE_CODE (type) == RECORD_TYPE
16828 || TREE_CODE (type) == UNION_TYPE
16829 || TREE_CODE (type) == QUAL_UNION_TYPE)
16830 && TYPE_FIELDS (type))
16831 {
16832 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16833 return 64;
16834 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16835 return 128;
16836 }
16837 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16838 || TREE_CODE (type) == INTEGER_TYPE)
16839 {
16840
16841 if (TYPE_MODE (type) == DFmode && align < 64)
16842 return 64;
16843 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16844 return 128;
16845 }
16846 return align;
16847 }
16848 \f
16849 /* Emit RTL insns to initialize the variable parts of a trampoline.
16850 FNADDR is an RTX for the address of the function's pure code.
16851 CXT is an RTX for the static chain value for the function. */
16852 void
16853 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
16854 {
16855 if (!TARGET_64BIT)
16856 {
16857 /* Compute offset from the end of the jmp to the target function. */
16858 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
16859 plus_constant (tramp, 10),
16860 NULL_RTX, 1, OPTAB_DIRECT);
16861 emit_move_insn (gen_rtx_MEM (QImode, tramp),
16862 gen_int_mode (0xb9, QImode));
16863 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
16864 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
16865 gen_int_mode (0xe9, QImode));
16866 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
16867 }
16868 else
16869 {
16870 int offset = 0;
16871 /* Try to load address using shorter movl instead of movabs.
16872 We may want to support movq for kernel mode, but kernel does not use
16873 trampolines at the moment. */
16874 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16875 {
16876 fnaddr = copy_to_mode_reg (DImode, fnaddr);
16877 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16878 gen_int_mode (0xbb41, HImode));
16879 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
16880 gen_lowpart (SImode, fnaddr));
16881 offset += 6;
16882 }
16883 else
16884 {
16885 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16886 gen_int_mode (0xbb49, HImode));
16887 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16888 fnaddr);
16889 offset += 10;
16890 }
16891 /* Load static chain using movabs to r10. */
16892 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16893 gen_int_mode (0xba49, HImode));
16894 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16895 cxt);
16896 offset += 10;
16897 /* Jump to the r11 */
16898 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16899 gen_int_mode (0xff49, HImode));
16900 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
16901 gen_int_mode (0xe3, QImode));
16902 offset += 3;
16903 gcc_assert (offset <= TRAMPOLINE_SIZE);
16904 }
16905
16906 #ifdef ENABLE_EXECUTE_STACK
16907 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16908 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
16909 #endif
16910 }
16911 \f
16912 /* Codes for all the SSE/MMX builtins. */
16913 enum ix86_builtins
16914 {
16915 IX86_BUILTIN_ADDPS,
16916 IX86_BUILTIN_ADDSS,
16917 IX86_BUILTIN_DIVPS,
16918 IX86_BUILTIN_DIVSS,
16919 IX86_BUILTIN_MULPS,
16920 IX86_BUILTIN_MULSS,
16921 IX86_BUILTIN_SUBPS,
16922 IX86_BUILTIN_SUBSS,
16923
16924 IX86_BUILTIN_CMPEQPS,
16925 IX86_BUILTIN_CMPLTPS,
16926 IX86_BUILTIN_CMPLEPS,
16927 IX86_BUILTIN_CMPGTPS,
16928 IX86_BUILTIN_CMPGEPS,
16929 IX86_BUILTIN_CMPNEQPS,
16930 IX86_BUILTIN_CMPNLTPS,
16931 IX86_BUILTIN_CMPNLEPS,
16932 IX86_BUILTIN_CMPNGTPS,
16933 IX86_BUILTIN_CMPNGEPS,
16934 IX86_BUILTIN_CMPORDPS,
16935 IX86_BUILTIN_CMPUNORDPS,
16936 IX86_BUILTIN_CMPEQSS,
16937 IX86_BUILTIN_CMPLTSS,
16938 IX86_BUILTIN_CMPLESS,
16939 IX86_BUILTIN_CMPNEQSS,
16940 IX86_BUILTIN_CMPNLTSS,
16941 IX86_BUILTIN_CMPNLESS,
16942 IX86_BUILTIN_CMPNGTSS,
16943 IX86_BUILTIN_CMPNGESS,
16944 IX86_BUILTIN_CMPORDSS,
16945 IX86_BUILTIN_CMPUNORDSS,
16946
16947 IX86_BUILTIN_COMIEQSS,
16948 IX86_BUILTIN_COMILTSS,
16949 IX86_BUILTIN_COMILESS,
16950 IX86_BUILTIN_COMIGTSS,
16951 IX86_BUILTIN_COMIGESS,
16952 IX86_BUILTIN_COMINEQSS,
16953 IX86_BUILTIN_UCOMIEQSS,
16954 IX86_BUILTIN_UCOMILTSS,
16955 IX86_BUILTIN_UCOMILESS,
16956 IX86_BUILTIN_UCOMIGTSS,
16957 IX86_BUILTIN_UCOMIGESS,
16958 IX86_BUILTIN_UCOMINEQSS,
16959
16960 IX86_BUILTIN_CVTPI2PS,
16961 IX86_BUILTIN_CVTPS2PI,
16962 IX86_BUILTIN_CVTSI2SS,
16963 IX86_BUILTIN_CVTSI642SS,
16964 IX86_BUILTIN_CVTSS2SI,
16965 IX86_BUILTIN_CVTSS2SI64,
16966 IX86_BUILTIN_CVTTPS2PI,
16967 IX86_BUILTIN_CVTTSS2SI,
16968 IX86_BUILTIN_CVTTSS2SI64,
16969
16970 IX86_BUILTIN_MAXPS,
16971 IX86_BUILTIN_MAXSS,
16972 IX86_BUILTIN_MINPS,
16973 IX86_BUILTIN_MINSS,
16974
16975 IX86_BUILTIN_LOADUPS,
16976 IX86_BUILTIN_STOREUPS,
16977 IX86_BUILTIN_MOVSS,
16978
16979 IX86_BUILTIN_MOVHLPS,
16980 IX86_BUILTIN_MOVLHPS,
16981 IX86_BUILTIN_LOADHPS,
16982 IX86_BUILTIN_LOADLPS,
16983 IX86_BUILTIN_STOREHPS,
16984 IX86_BUILTIN_STORELPS,
16985
16986 IX86_BUILTIN_MASKMOVQ,
16987 IX86_BUILTIN_MOVMSKPS,
16988 IX86_BUILTIN_PMOVMSKB,
16989
16990 IX86_BUILTIN_MOVNTPS,
16991 IX86_BUILTIN_MOVNTQ,
16992
16993 IX86_BUILTIN_LOADDQU,
16994 IX86_BUILTIN_STOREDQU,
16995
16996 IX86_BUILTIN_PACKSSWB,
16997 IX86_BUILTIN_PACKSSDW,
16998 IX86_BUILTIN_PACKUSWB,
16999
17000 IX86_BUILTIN_PADDB,
17001 IX86_BUILTIN_PADDW,
17002 IX86_BUILTIN_PADDD,
17003 IX86_BUILTIN_PADDQ,
17004 IX86_BUILTIN_PADDSB,
17005 IX86_BUILTIN_PADDSW,
17006 IX86_BUILTIN_PADDUSB,
17007 IX86_BUILTIN_PADDUSW,
17008 IX86_BUILTIN_PSUBB,
17009 IX86_BUILTIN_PSUBW,
17010 IX86_BUILTIN_PSUBD,
17011 IX86_BUILTIN_PSUBQ,
17012 IX86_BUILTIN_PSUBSB,
17013 IX86_BUILTIN_PSUBSW,
17014 IX86_BUILTIN_PSUBUSB,
17015 IX86_BUILTIN_PSUBUSW,
17016
17017 IX86_BUILTIN_PAND,
17018 IX86_BUILTIN_PANDN,
17019 IX86_BUILTIN_POR,
17020 IX86_BUILTIN_PXOR,
17021
17022 IX86_BUILTIN_PAVGB,
17023 IX86_BUILTIN_PAVGW,
17024
17025 IX86_BUILTIN_PCMPEQB,
17026 IX86_BUILTIN_PCMPEQW,
17027 IX86_BUILTIN_PCMPEQD,
17028 IX86_BUILTIN_PCMPGTB,
17029 IX86_BUILTIN_PCMPGTW,
17030 IX86_BUILTIN_PCMPGTD,
17031
17032 IX86_BUILTIN_PMADDWD,
17033
17034 IX86_BUILTIN_PMAXSW,
17035 IX86_BUILTIN_PMAXUB,
17036 IX86_BUILTIN_PMINSW,
17037 IX86_BUILTIN_PMINUB,
17038
17039 IX86_BUILTIN_PMULHUW,
17040 IX86_BUILTIN_PMULHW,
17041 IX86_BUILTIN_PMULLW,
17042
17043 IX86_BUILTIN_PSADBW,
17044 IX86_BUILTIN_PSHUFW,
17045
17046 IX86_BUILTIN_PSLLW,
17047 IX86_BUILTIN_PSLLD,
17048 IX86_BUILTIN_PSLLQ,
17049 IX86_BUILTIN_PSRAW,
17050 IX86_BUILTIN_PSRAD,
17051 IX86_BUILTIN_PSRLW,
17052 IX86_BUILTIN_PSRLD,
17053 IX86_BUILTIN_PSRLQ,
17054 IX86_BUILTIN_PSLLWI,
17055 IX86_BUILTIN_PSLLDI,
17056 IX86_BUILTIN_PSLLQI,
17057 IX86_BUILTIN_PSRAWI,
17058 IX86_BUILTIN_PSRADI,
17059 IX86_BUILTIN_PSRLWI,
17060 IX86_BUILTIN_PSRLDI,
17061 IX86_BUILTIN_PSRLQI,
17062
17063 IX86_BUILTIN_PUNPCKHBW,
17064 IX86_BUILTIN_PUNPCKHWD,
17065 IX86_BUILTIN_PUNPCKHDQ,
17066 IX86_BUILTIN_PUNPCKLBW,
17067 IX86_BUILTIN_PUNPCKLWD,
17068 IX86_BUILTIN_PUNPCKLDQ,
17069
17070 IX86_BUILTIN_SHUFPS,
17071
17072 IX86_BUILTIN_RCPPS,
17073 IX86_BUILTIN_RCPSS,
17074 IX86_BUILTIN_RSQRTPS,
17075 IX86_BUILTIN_RSQRTSS,
17076 IX86_BUILTIN_RSQRTF,
17077 IX86_BUILTIN_SQRTPS,
17078 IX86_BUILTIN_SQRTSS,
17079
17080 IX86_BUILTIN_UNPCKHPS,
17081 IX86_BUILTIN_UNPCKLPS,
17082
17083 IX86_BUILTIN_ANDPS,
17084 IX86_BUILTIN_ANDNPS,
17085 IX86_BUILTIN_ORPS,
17086 IX86_BUILTIN_XORPS,
17087
17088 IX86_BUILTIN_EMMS,
17089 IX86_BUILTIN_LDMXCSR,
17090 IX86_BUILTIN_STMXCSR,
17091 IX86_BUILTIN_SFENCE,
17092
17093 /* 3DNow! Original */
17094 IX86_BUILTIN_FEMMS,
17095 IX86_BUILTIN_PAVGUSB,
17096 IX86_BUILTIN_PF2ID,
17097 IX86_BUILTIN_PFACC,
17098 IX86_BUILTIN_PFADD,
17099 IX86_BUILTIN_PFCMPEQ,
17100 IX86_BUILTIN_PFCMPGE,
17101 IX86_BUILTIN_PFCMPGT,
17102 IX86_BUILTIN_PFMAX,
17103 IX86_BUILTIN_PFMIN,
17104 IX86_BUILTIN_PFMUL,
17105 IX86_BUILTIN_PFRCP,
17106 IX86_BUILTIN_PFRCPIT1,
17107 IX86_BUILTIN_PFRCPIT2,
17108 IX86_BUILTIN_PFRSQIT1,
17109 IX86_BUILTIN_PFRSQRT,
17110 IX86_BUILTIN_PFSUB,
17111 IX86_BUILTIN_PFSUBR,
17112 IX86_BUILTIN_PI2FD,
17113 IX86_BUILTIN_PMULHRW,
17114
17115 /* 3DNow! Athlon Extensions */
17116 IX86_BUILTIN_PF2IW,
17117 IX86_BUILTIN_PFNACC,
17118 IX86_BUILTIN_PFPNACC,
17119 IX86_BUILTIN_PI2FW,
17120 IX86_BUILTIN_PSWAPDSI,
17121 IX86_BUILTIN_PSWAPDSF,
17122
17123 /* SSE2 */
17124 IX86_BUILTIN_ADDPD,
17125 IX86_BUILTIN_ADDSD,
17126 IX86_BUILTIN_DIVPD,
17127 IX86_BUILTIN_DIVSD,
17128 IX86_BUILTIN_MULPD,
17129 IX86_BUILTIN_MULSD,
17130 IX86_BUILTIN_SUBPD,
17131 IX86_BUILTIN_SUBSD,
17132
17133 IX86_BUILTIN_CMPEQPD,
17134 IX86_BUILTIN_CMPLTPD,
17135 IX86_BUILTIN_CMPLEPD,
17136 IX86_BUILTIN_CMPGTPD,
17137 IX86_BUILTIN_CMPGEPD,
17138 IX86_BUILTIN_CMPNEQPD,
17139 IX86_BUILTIN_CMPNLTPD,
17140 IX86_BUILTIN_CMPNLEPD,
17141 IX86_BUILTIN_CMPNGTPD,
17142 IX86_BUILTIN_CMPNGEPD,
17143 IX86_BUILTIN_CMPORDPD,
17144 IX86_BUILTIN_CMPUNORDPD,
17145 IX86_BUILTIN_CMPEQSD,
17146 IX86_BUILTIN_CMPLTSD,
17147 IX86_BUILTIN_CMPLESD,
17148 IX86_BUILTIN_CMPNEQSD,
17149 IX86_BUILTIN_CMPNLTSD,
17150 IX86_BUILTIN_CMPNLESD,
17151 IX86_BUILTIN_CMPORDSD,
17152 IX86_BUILTIN_CMPUNORDSD,
17153
17154 IX86_BUILTIN_COMIEQSD,
17155 IX86_BUILTIN_COMILTSD,
17156 IX86_BUILTIN_COMILESD,
17157 IX86_BUILTIN_COMIGTSD,
17158 IX86_BUILTIN_COMIGESD,
17159 IX86_BUILTIN_COMINEQSD,
17160 IX86_BUILTIN_UCOMIEQSD,
17161 IX86_BUILTIN_UCOMILTSD,
17162 IX86_BUILTIN_UCOMILESD,
17163 IX86_BUILTIN_UCOMIGTSD,
17164 IX86_BUILTIN_UCOMIGESD,
17165 IX86_BUILTIN_UCOMINEQSD,
17166
17167 IX86_BUILTIN_MAXPD,
17168 IX86_BUILTIN_MAXSD,
17169 IX86_BUILTIN_MINPD,
17170 IX86_BUILTIN_MINSD,
17171
17172 IX86_BUILTIN_ANDPD,
17173 IX86_BUILTIN_ANDNPD,
17174 IX86_BUILTIN_ORPD,
17175 IX86_BUILTIN_XORPD,
17176
17177 IX86_BUILTIN_SQRTPD,
17178 IX86_BUILTIN_SQRTSD,
17179
17180 IX86_BUILTIN_UNPCKHPD,
17181 IX86_BUILTIN_UNPCKLPD,
17182
17183 IX86_BUILTIN_SHUFPD,
17184
17185 IX86_BUILTIN_LOADUPD,
17186 IX86_BUILTIN_STOREUPD,
17187 IX86_BUILTIN_MOVSD,
17188
17189 IX86_BUILTIN_LOADHPD,
17190 IX86_BUILTIN_LOADLPD,
17191
17192 IX86_BUILTIN_CVTDQ2PD,
17193 IX86_BUILTIN_CVTDQ2PS,
17194
17195 IX86_BUILTIN_CVTPD2DQ,
17196 IX86_BUILTIN_CVTPD2PI,
17197 IX86_BUILTIN_CVTPD2PS,
17198 IX86_BUILTIN_CVTTPD2DQ,
17199 IX86_BUILTIN_CVTTPD2PI,
17200
17201 IX86_BUILTIN_CVTPI2PD,
17202 IX86_BUILTIN_CVTSI2SD,
17203 IX86_BUILTIN_CVTSI642SD,
17204
17205 IX86_BUILTIN_CVTSD2SI,
17206 IX86_BUILTIN_CVTSD2SI64,
17207 IX86_BUILTIN_CVTSD2SS,
17208 IX86_BUILTIN_CVTSS2SD,
17209 IX86_BUILTIN_CVTTSD2SI,
17210 IX86_BUILTIN_CVTTSD2SI64,
17211
17212 IX86_BUILTIN_CVTPS2DQ,
17213 IX86_BUILTIN_CVTPS2PD,
17214 IX86_BUILTIN_CVTTPS2DQ,
17215
17216 IX86_BUILTIN_MOVNTI,
17217 IX86_BUILTIN_MOVNTPD,
17218 IX86_BUILTIN_MOVNTDQ,
17219
17220 /* SSE2 MMX */
17221 IX86_BUILTIN_MASKMOVDQU,
17222 IX86_BUILTIN_MOVMSKPD,
17223 IX86_BUILTIN_PMOVMSKB128,
17224
17225 IX86_BUILTIN_PACKSSWB128,
17226 IX86_BUILTIN_PACKSSDW128,
17227 IX86_BUILTIN_PACKUSWB128,
17228
17229 IX86_BUILTIN_PADDB128,
17230 IX86_BUILTIN_PADDW128,
17231 IX86_BUILTIN_PADDD128,
17232 IX86_BUILTIN_PADDQ128,
17233 IX86_BUILTIN_PADDSB128,
17234 IX86_BUILTIN_PADDSW128,
17235 IX86_BUILTIN_PADDUSB128,
17236 IX86_BUILTIN_PADDUSW128,
17237 IX86_BUILTIN_PSUBB128,
17238 IX86_BUILTIN_PSUBW128,
17239 IX86_BUILTIN_PSUBD128,
17240 IX86_BUILTIN_PSUBQ128,
17241 IX86_BUILTIN_PSUBSB128,
17242 IX86_BUILTIN_PSUBSW128,
17243 IX86_BUILTIN_PSUBUSB128,
17244 IX86_BUILTIN_PSUBUSW128,
17245
17246 IX86_BUILTIN_PAND128,
17247 IX86_BUILTIN_PANDN128,
17248 IX86_BUILTIN_POR128,
17249 IX86_BUILTIN_PXOR128,
17250
17251 IX86_BUILTIN_PAVGB128,
17252 IX86_BUILTIN_PAVGW128,
17253
17254 IX86_BUILTIN_PCMPEQB128,
17255 IX86_BUILTIN_PCMPEQW128,
17256 IX86_BUILTIN_PCMPEQD128,
17257 IX86_BUILTIN_PCMPGTB128,
17258 IX86_BUILTIN_PCMPGTW128,
17259 IX86_BUILTIN_PCMPGTD128,
17260
17261 IX86_BUILTIN_PMADDWD128,
17262
17263 IX86_BUILTIN_PMAXSW128,
17264 IX86_BUILTIN_PMAXUB128,
17265 IX86_BUILTIN_PMINSW128,
17266 IX86_BUILTIN_PMINUB128,
17267
17268 IX86_BUILTIN_PMULUDQ,
17269 IX86_BUILTIN_PMULUDQ128,
17270 IX86_BUILTIN_PMULHUW128,
17271 IX86_BUILTIN_PMULHW128,
17272 IX86_BUILTIN_PMULLW128,
17273
17274 IX86_BUILTIN_PSADBW128,
17275 IX86_BUILTIN_PSHUFHW,
17276 IX86_BUILTIN_PSHUFLW,
17277 IX86_BUILTIN_PSHUFD,
17278
17279 IX86_BUILTIN_PSLLDQI128,
17280 IX86_BUILTIN_PSLLWI128,
17281 IX86_BUILTIN_PSLLDI128,
17282 IX86_BUILTIN_PSLLQI128,
17283 IX86_BUILTIN_PSRAWI128,
17284 IX86_BUILTIN_PSRADI128,
17285 IX86_BUILTIN_PSRLDQI128,
17286 IX86_BUILTIN_PSRLWI128,
17287 IX86_BUILTIN_PSRLDI128,
17288 IX86_BUILTIN_PSRLQI128,
17289
17290 IX86_BUILTIN_PSLLDQ128,
17291 IX86_BUILTIN_PSLLW128,
17292 IX86_BUILTIN_PSLLD128,
17293 IX86_BUILTIN_PSLLQ128,
17294 IX86_BUILTIN_PSRAW128,
17295 IX86_BUILTIN_PSRAD128,
17296 IX86_BUILTIN_PSRLW128,
17297 IX86_BUILTIN_PSRLD128,
17298 IX86_BUILTIN_PSRLQ128,
17299
17300 IX86_BUILTIN_PUNPCKHBW128,
17301 IX86_BUILTIN_PUNPCKHWD128,
17302 IX86_BUILTIN_PUNPCKHDQ128,
17303 IX86_BUILTIN_PUNPCKHQDQ128,
17304 IX86_BUILTIN_PUNPCKLBW128,
17305 IX86_BUILTIN_PUNPCKLWD128,
17306 IX86_BUILTIN_PUNPCKLDQ128,
17307 IX86_BUILTIN_PUNPCKLQDQ128,
17308
17309 IX86_BUILTIN_CLFLUSH,
17310 IX86_BUILTIN_MFENCE,
17311 IX86_BUILTIN_LFENCE,
17312
17313 /* Prescott New Instructions. */
17314 IX86_BUILTIN_ADDSUBPS,
17315 IX86_BUILTIN_HADDPS,
17316 IX86_BUILTIN_HSUBPS,
17317 IX86_BUILTIN_MOVSHDUP,
17318 IX86_BUILTIN_MOVSLDUP,
17319 IX86_BUILTIN_ADDSUBPD,
17320 IX86_BUILTIN_HADDPD,
17321 IX86_BUILTIN_HSUBPD,
17322 IX86_BUILTIN_LDDQU,
17323
17324 IX86_BUILTIN_MONITOR,
17325 IX86_BUILTIN_MWAIT,
17326
17327 /* SSSE3. */
17328 IX86_BUILTIN_PHADDW,
17329 IX86_BUILTIN_PHADDD,
17330 IX86_BUILTIN_PHADDSW,
17331 IX86_BUILTIN_PHSUBW,
17332 IX86_BUILTIN_PHSUBD,
17333 IX86_BUILTIN_PHSUBSW,
17334 IX86_BUILTIN_PMADDUBSW,
17335 IX86_BUILTIN_PMULHRSW,
17336 IX86_BUILTIN_PSHUFB,
17337 IX86_BUILTIN_PSIGNB,
17338 IX86_BUILTIN_PSIGNW,
17339 IX86_BUILTIN_PSIGND,
17340 IX86_BUILTIN_PALIGNR,
17341 IX86_BUILTIN_PABSB,
17342 IX86_BUILTIN_PABSW,
17343 IX86_BUILTIN_PABSD,
17344
17345 IX86_BUILTIN_PHADDW128,
17346 IX86_BUILTIN_PHADDD128,
17347 IX86_BUILTIN_PHADDSW128,
17348 IX86_BUILTIN_PHSUBW128,
17349 IX86_BUILTIN_PHSUBD128,
17350 IX86_BUILTIN_PHSUBSW128,
17351 IX86_BUILTIN_PMADDUBSW128,
17352 IX86_BUILTIN_PMULHRSW128,
17353 IX86_BUILTIN_PSHUFB128,
17354 IX86_BUILTIN_PSIGNB128,
17355 IX86_BUILTIN_PSIGNW128,
17356 IX86_BUILTIN_PSIGND128,
17357 IX86_BUILTIN_PALIGNR128,
17358 IX86_BUILTIN_PABSB128,
17359 IX86_BUILTIN_PABSW128,
17360 IX86_BUILTIN_PABSD128,
17361
17362 /* AMDFAM10 - SSE4A New Instructions. */
17363 IX86_BUILTIN_MOVNTSD,
17364 IX86_BUILTIN_MOVNTSS,
17365 IX86_BUILTIN_EXTRQI,
17366 IX86_BUILTIN_EXTRQ,
17367 IX86_BUILTIN_INSERTQI,
17368 IX86_BUILTIN_INSERTQ,
17369
17370 /* SSE4.1. */
17371 IX86_BUILTIN_BLENDPD,
17372 IX86_BUILTIN_BLENDPS,
17373 IX86_BUILTIN_BLENDVPD,
17374 IX86_BUILTIN_BLENDVPS,
17375 IX86_BUILTIN_PBLENDVB128,
17376 IX86_BUILTIN_PBLENDW128,
17377
17378 IX86_BUILTIN_DPPD,
17379 IX86_BUILTIN_DPPS,
17380
17381 IX86_BUILTIN_INSERTPS128,
17382
17383 IX86_BUILTIN_MOVNTDQA,
17384 IX86_BUILTIN_MPSADBW128,
17385 IX86_BUILTIN_PACKUSDW128,
17386 IX86_BUILTIN_PCMPEQQ,
17387 IX86_BUILTIN_PHMINPOSUW128,
17388
17389 IX86_BUILTIN_PMAXSB128,
17390 IX86_BUILTIN_PMAXSD128,
17391 IX86_BUILTIN_PMAXUD128,
17392 IX86_BUILTIN_PMAXUW128,
17393
17394 IX86_BUILTIN_PMINSB128,
17395 IX86_BUILTIN_PMINSD128,
17396 IX86_BUILTIN_PMINUD128,
17397 IX86_BUILTIN_PMINUW128,
17398
17399 IX86_BUILTIN_PMOVSXBW128,
17400 IX86_BUILTIN_PMOVSXBD128,
17401 IX86_BUILTIN_PMOVSXBQ128,
17402 IX86_BUILTIN_PMOVSXWD128,
17403 IX86_BUILTIN_PMOVSXWQ128,
17404 IX86_BUILTIN_PMOVSXDQ128,
17405
17406 IX86_BUILTIN_PMOVZXBW128,
17407 IX86_BUILTIN_PMOVZXBD128,
17408 IX86_BUILTIN_PMOVZXBQ128,
17409 IX86_BUILTIN_PMOVZXWD128,
17410 IX86_BUILTIN_PMOVZXWQ128,
17411 IX86_BUILTIN_PMOVZXDQ128,
17412
17413 IX86_BUILTIN_PMULDQ128,
17414 IX86_BUILTIN_PMULLD128,
17415
17416 IX86_BUILTIN_ROUNDPD,
17417 IX86_BUILTIN_ROUNDPS,
17418 IX86_BUILTIN_ROUNDSD,
17419 IX86_BUILTIN_ROUNDSS,
17420
17421 IX86_BUILTIN_PTESTZ,
17422 IX86_BUILTIN_PTESTC,
17423 IX86_BUILTIN_PTESTNZC,
17424
17425 IX86_BUILTIN_VEC_INIT_V2SI,
17426 IX86_BUILTIN_VEC_INIT_V4HI,
17427 IX86_BUILTIN_VEC_INIT_V8QI,
17428 IX86_BUILTIN_VEC_EXT_V2DF,
17429 IX86_BUILTIN_VEC_EXT_V2DI,
17430 IX86_BUILTIN_VEC_EXT_V4SF,
17431 IX86_BUILTIN_VEC_EXT_V4SI,
17432 IX86_BUILTIN_VEC_EXT_V8HI,
17433 IX86_BUILTIN_VEC_EXT_V2SI,
17434 IX86_BUILTIN_VEC_EXT_V4HI,
17435 IX86_BUILTIN_VEC_EXT_V16QI,
17436 IX86_BUILTIN_VEC_SET_V2DI,
17437 IX86_BUILTIN_VEC_SET_V4SF,
17438 IX86_BUILTIN_VEC_SET_V4SI,
17439 IX86_BUILTIN_VEC_SET_V8HI,
17440 IX86_BUILTIN_VEC_SET_V4HI,
17441 IX86_BUILTIN_VEC_SET_V16QI,
17442
17443 IX86_BUILTIN_VEC_PACK_SFIX,
17444
17445 /* SSE4.2. */
17446 IX86_BUILTIN_CRC32QI,
17447 IX86_BUILTIN_CRC32HI,
17448 IX86_BUILTIN_CRC32SI,
17449 IX86_BUILTIN_CRC32DI,
17450
17451 IX86_BUILTIN_PCMPESTRI128,
17452 IX86_BUILTIN_PCMPESTRM128,
17453 IX86_BUILTIN_PCMPESTRA128,
17454 IX86_BUILTIN_PCMPESTRC128,
17455 IX86_BUILTIN_PCMPESTRO128,
17456 IX86_BUILTIN_PCMPESTRS128,
17457 IX86_BUILTIN_PCMPESTRZ128,
17458 IX86_BUILTIN_PCMPISTRI128,
17459 IX86_BUILTIN_PCMPISTRM128,
17460 IX86_BUILTIN_PCMPISTRA128,
17461 IX86_BUILTIN_PCMPISTRC128,
17462 IX86_BUILTIN_PCMPISTRO128,
17463 IX86_BUILTIN_PCMPISTRS128,
17464 IX86_BUILTIN_PCMPISTRZ128,
17465
17466 IX86_BUILTIN_PCMPGTQ,
17467
17468 /* TFmode support builtins. */
17469 IX86_BUILTIN_INFQ,
17470 IX86_BUILTIN_FABSQ,
17471 IX86_BUILTIN_COPYSIGNQ,
17472
17473 /* SSE5 instructions */
17474 IX86_BUILTIN_FMADDSS,
17475 IX86_BUILTIN_FMADDSD,
17476 IX86_BUILTIN_FMADDPS,
17477 IX86_BUILTIN_FMADDPD,
17478 IX86_BUILTIN_FMSUBSS,
17479 IX86_BUILTIN_FMSUBSD,
17480 IX86_BUILTIN_FMSUBPS,
17481 IX86_BUILTIN_FMSUBPD,
17482 IX86_BUILTIN_FNMADDSS,
17483 IX86_BUILTIN_FNMADDSD,
17484 IX86_BUILTIN_FNMADDPS,
17485 IX86_BUILTIN_FNMADDPD,
17486 IX86_BUILTIN_FNMSUBSS,
17487 IX86_BUILTIN_FNMSUBSD,
17488 IX86_BUILTIN_FNMSUBPS,
17489 IX86_BUILTIN_FNMSUBPD,
17490 IX86_BUILTIN_PCMOV_V2DI,
17491 IX86_BUILTIN_PCMOV_V4SI,
17492 IX86_BUILTIN_PCMOV_V8HI,
17493 IX86_BUILTIN_PCMOV_V16QI,
17494 IX86_BUILTIN_PCMOV_V4SF,
17495 IX86_BUILTIN_PCMOV_V2DF,
17496 IX86_BUILTIN_PPERM,
17497 IX86_BUILTIN_PERMPS,
17498 IX86_BUILTIN_PERMPD,
17499 IX86_BUILTIN_PMACSSWW,
17500 IX86_BUILTIN_PMACSWW,
17501 IX86_BUILTIN_PMACSSWD,
17502 IX86_BUILTIN_PMACSWD,
17503 IX86_BUILTIN_PMACSSDD,
17504 IX86_BUILTIN_PMACSDD,
17505 IX86_BUILTIN_PMACSSDQL,
17506 IX86_BUILTIN_PMACSSDQH,
17507 IX86_BUILTIN_PMACSDQL,
17508 IX86_BUILTIN_PMACSDQH,
17509 IX86_BUILTIN_PMADCSSWD,
17510 IX86_BUILTIN_PMADCSWD,
17511 IX86_BUILTIN_PHADDBW,
17512 IX86_BUILTIN_PHADDBD,
17513 IX86_BUILTIN_PHADDBQ,
17514 IX86_BUILTIN_PHADDWD,
17515 IX86_BUILTIN_PHADDWQ,
17516 IX86_BUILTIN_PHADDDQ,
17517 IX86_BUILTIN_PHADDUBW,
17518 IX86_BUILTIN_PHADDUBD,
17519 IX86_BUILTIN_PHADDUBQ,
17520 IX86_BUILTIN_PHADDUWD,
17521 IX86_BUILTIN_PHADDUWQ,
17522 IX86_BUILTIN_PHADDUDQ,
17523 IX86_BUILTIN_PHSUBBW,
17524 IX86_BUILTIN_PHSUBWD,
17525 IX86_BUILTIN_PHSUBDQ,
17526 IX86_BUILTIN_PROTB,
17527 IX86_BUILTIN_PROTW,
17528 IX86_BUILTIN_PROTD,
17529 IX86_BUILTIN_PROTQ,
17530 IX86_BUILTIN_PROTB_IMM,
17531 IX86_BUILTIN_PROTW_IMM,
17532 IX86_BUILTIN_PROTD_IMM,
17533 IX86_BUILTIN_PROTQ_IMM,
17534 IX86_BUILTIN_PSHLB,
17535 IX86_BUILTIN_PSHLW,
17536 IX86_BUILTIN_PSHLD,
17537 IX86_BUILTIN_PSHLQ,
17538 IX86_BUILTIN_PSHAB,
17539 IX86_BUILTIN_PSHAW,
17540 IX86_BUILTIN_PSHAD,
17541 IX86_BUILTIN_PSHAQ,
17542 IX86_BUILTIN_FRCZSS,
17543 IX86_BUILTIN_FRCZSD,
17544 IX86_BUILTIN_FRCZPS,
17545 IX86_BUILTIN_FRCZPD,
17546 IX86_BUILTIN_CVTPH2PS,
17547 IX86_BUILTIN_CVTPS2PH,
17548
17549 IX86_BUILTIN_COMEQSS,
17550 IX86_BUILTIN_COMNESS,
17551 IX86_BUILTIN_COMLTSS,
17552 IX86_BUILTIN_COMLESS,
17553 IX86_BUILTIN_COMGTSS,
17554 IX86_BUILTIN_COMGESS,
17555 IX86_BUILTIN_COMUEQSS,
17556 IX86_BUILTIN_COMUNESS,
17557 IX86_BUILTIN_COMULTSS,
17558 IX86_BUILTIN_COMULESS,
17559 IX86_BUILTIN_COMUGTSS,
17560 IX86_BUILTIN_COMUGESS,
17561 IX86_BUILTIN_COMORDSS,
17562 IX86_BUILTIN_COMUNORDSS,
17563 IX86_BUILTIN_COMFALSESS,
17564 IX86_BUILTIN_COMTRUESS,
17565
17566 IX86_BUILTIN_COMEQSD,
17567 IX86_BUILTIN_COMNESD,
17568 IX86_BUILTIN_COMLTSD,
17569 IX86_BUILTIN_COMLESD,
17570 IX86_BUILTIN_COMGTSD,
17571 IX86_BUILTIN_COMGESD,
17572 IX86_BUILTIN_COMUEQSD,
17573 IX86_BUILTIN_COMUNESD,
17574 IX86_BUILTIN_COMULTSD,
17575 IX86_BUILTIN_COMULESD,
17576 IX86_BUILTIN_COMUGTSD,
17577 IX86_BUILTIN_COMUGESD,
17578 IX86_BUILTIN_COMORDSD,
17579 IX86_BUILTIN_COMUNORDSD,
17580 IX86_BUILTIN_COMFALSESD,
17581 IX86_BUILTIN_COMTRUESD,
17582
17583 IX86_BUILTIN_COMEQPS,
17584 IX86_BUILTIN_COMNEPS,
17585 IX86_BUILTIN_COMLTPS,
17586 IX86_BUILTIN_COMLEPS,
17587 IX86_BUILTIN_COMGTPS,
17588 IX86_BUILTIN_COMGEPS,
17589 IX86_BUILTIN_COMUEQPS,
17590 IX86_BUILTIN_COMUNEPS,
17591 IX86_BUILTIN_COMULTPS,
17592 IX86_BUILTIN_COMULEPS,
17593 IX86_BUILTIN_COMUGTPS,
17594 IX86_BUILTIN_COMUGEPS,
17595 IX86_BUILTIN_COMORDPS,
17596 IX86_BUILTIN_COMUNORDPS,
17597 IX86_BUILTIN_COMFALSEPS,
17598 IX86_BUILTIN_COMTRUEPS,
17599
17600 IX86_BUILTIN_COMEQPD,
17601 IX86_BUILTIN_COMNEPD,
17602 IX86_BUILTIN_COMLTPD,
17603 IX86_BUILTIN_COMLEPD,
17604 IX86_BUILTIN_COMGTPD,
17605 IX86_BUILTIN_COMGEPD,
17606 IX86_BUILTIN_COMUEQPD,
17607 IX86_BUILTIN_COMUNEPD,
17608 IX86_BUILTIN_COMULTPD,
17609 IX86_BUILTIN_COMULEPD,
17610 IX86_BUILTIN_COMUGTPD,
17611 IX86_BUILTIN_COMUGEPD,
17612 IX86_BUILTIN_COMORDPD,
17613 IX86_BUILTIN_COMUNORDPD,
17614 IX86_BUILTIN_COMFALSEPD,
17615 IX86_BUILTIN_COMTRUEPD,
17616
17617 IX86_BUILTIN_PCOMEQUB,
17618 IX86_BUILTIN_PCOMNEUB,
17619 IX86_BUILTIN_PCOMLTUB,
17620 IX86_BUILTIN_PCOMLEUB,
17621 IX86_BUILTIN_PCOMGTUB,
17622 IX86_BUILTIN_PCOMGEUB,
17623 IX86_BUILTIN_PCOMFALSEUB,
17624 IX86_BUILTIN_PCOMTRUEUB,
17625 IX86_BUILTIN_PCOMEQUW,
17626 IX86_BUILTIN_PCOMNEUW,
17627 IX86_BUILTIN_PCOMLTUW,
17628 IX86_BUILTIN_PCOMLEUW,
17629 IX86_BUILTIN_PCOMGTUW,
17630 IX86_BUILTIN_PCOMGEUW,
17631 IX86_BUILTIN_PCOMFALSEUW,
17632 IX86_BUILTIN_PCOMTRUEUW,
17633 IX86_BUILTIN_PCOMEQUD,
17634 IX86_BUILTIN_PCOMNEUD,
17635 IX86_BUILTIN_PCOMLTUD,
17636 IX86_BUILTIN_PCOMLEUD,
17637 IX86_BUILTIN_PCOMGTUD,
17638 IX86_BUILTIN_PCOMGEUD,
17639 IX86_BUILTIN_PCOMFALSEUD,
17640 IX86_BUILTIN_PCOMTRUEUD,
17641 IX86_BUILTIN_PCOMEQUQ,
17642 IX86_BUILTIN_PCOMNEUQ,
17643 IX86_BUILTIN_PCOMLTUQ,
17644 IX86_BUILTIN_PCOMLEUQ,
17645 IX86_BUILTIN_PCOMGTUQ,
17646 IX86_BUILTIN_PCOMGEUQ,
17647 IX86_BUILTIN_PCOMFALSEUQ,
17648 IX86_BUILTIN_PCOMTRUEUQ,
17649
17650 IX86_BUILTIN_PCOMEQB,
17651 IX86_BUILTIN_PCOMNEB,
17652 IX86_BUILTIN_PCOMLTB,
17653 IX86_BUILTIN_PCOMLEB,
17654 IX86_BUILTIN_PCOMGTB,
17655 IX86_BUILTIN_PCOMGEB,
17656 IX86_BUILTIN_PCOMFALSEB,
17657 IX86_BUILTIN_PCOMTRUEB,
17658 IX86_BUILTIN_PCOMEQW,
17659 IX86_BUILTIN_PCOMNEW,
17660 IX86_BUILTIN_PCOMLTW,
17661 IX86_BUILTIN_PCOMLEW,
17662 IX86_BUILTIN_PCOMGTW,
17663 IX86_BUILTIN_PCOMGEW,
17664 IX86_BUILTIN_PCOMFALSEW,
17665 IX86_BUILTIN_PCOMTRUEW,
17666 IX86_BUILTIN_PCOMEQD,
17667 IX86_BUILTIN_PCOMNED,
17668 IX86_BUILTIN_PCOMLTD,
17669 IX86_BUILTIN_PCOMLED,
17670 IX86_BUILTIN_PCOMGTD,
17671 IX86_BUILTIN_PCOMGED,
17672 IX86_BUILTIN_PCOMFALSED,
17673 IX86_BUILTIN_PCOMTRUED,
17674 IX86_BUILTIN_PCOMEQQ,
17675 IX86_BUILTIN_PCOMNEQ,
17676 IX86_BUILTIN_PCOMLTQ,
17677 IX86_BUILTIN_PCOMLEQ,
17678 IX86_BUILTIN_PCOMGTQ,
17679 IX86_BUILTIN_PCOMGEQ,
17680 IX86_BUILTIN_PCOMFALSEQ,
17681 IX86_BUILTIN_PCOMTRUEQ,
17682
17683 IX86_BUILTIN_MAX
17684 };
17685
17686 /* Table for the ix86 builtin decls. */
17687 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
17688
17689 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
17690 * if the target_flags include one of MASK. Stores the function decl
17691 * in the ix86_builtins array.
17692 * Returns the function decl or NULL_TREE, if the builtin was not added. */
17693
17694 static inline tree
17695 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
17696 {
17697 tree decl = NULL_TREE;
17698
17699 if (mask & ix86_isa_flags
17700 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
17701 {
17702 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
17703 NULL, NULL_TREE);
17704 ix86_builtins[(int) code] = decl;
17705 }
17706
17707 return decl;
17708 }
17709
17710 /* Like def_builtin, but also marks the function decl "const". */
17711
17712 static inline tree
17713 def_builtin_const (int mask, const char *name, tree type,
17714 enum ix86_builtins code)
17715 {
17716 tree decl = def_builtin (mask, name, type, code);
17717 if (decl)
17718 TREE_READONLY (decl) = 1;
17719 return decl;
17720 }
17721
17722 /* Bits for builtin_description.flag. */
17723
17724 /* Set when we don't support the comparison natively, and should
17725 swap_comparison in order to support it. */
17726 #define BUILTIN_DESC_SWAP_OPERANDS 1
17727
17728 struct builtin_description
17729 {
17730 const unsigned int mask;
17731 const enum insn_code icode;
17732 const char *const name;
17733 const enum ix86_builtins code;
17734 const enum rtx_code comparison;
17735 const int flag;
17736 };
17737
17738 static const struct builtin_description bdesc_comi[] =
17739 {
17740 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
17741 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
17742 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
17743 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
17744 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
17745 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
17746 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
17747 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
17748 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
17749 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
17750 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
17751 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
17752 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
17753 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
17754 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
17755 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
17756 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
17757 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
17758 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
17759 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
17760 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
17761 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
17762 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
17763 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
17764 };
17765
17766 static const struct builtin_description bdesc_ptest[] =
17767 {
17768 /* SSE4.1 */
17769 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
17770 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
17771 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
17772 };
17773
17774 static const struct builtin_description bdesc_pcmpestr[] =
17775 {
17776 /* SSE4.2 */
17777 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
17778 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
17779 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
17780 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
17781 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
17782 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
17783 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
17784 };
17785
17786 static const struct builtin_description bdesc_pcmpistr[] =
17787 {
17788 /* SSE4.2 */
17789 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
17790 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
17791 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
17792 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
17793 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
17794 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
17795 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
17796 };
17797
17798 static const struct builtin_description bdesc_crc32[] =
17799 {
17800 /* SSE4.2 */
17801 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
17802 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
17803 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
17804 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
17805 };
17806
17807 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
17808 static const struct builtin_description bdesc_sse_3arg[] =
17809 {
17810 /* SSE4.1 */
17811 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
17812 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
17813 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
17814 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
17815 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
17816 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
17817 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
17818 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
17819 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
17820 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
17821 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
17822 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
17823 };
17824
17825 static const struct builtin_description bdesc_2arg[] =
17826 {
17827 /* SSE */
17828 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
17829 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
17830 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
17831 { OPTION_MASK_ISA_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
17832 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
17833 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
17834 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
17835 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, 0 },
17836
17837 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
17838 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
17839 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
17840 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
17841 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
17842 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
17843 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
17844 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
17845 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
17846 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17847 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17848 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
17849 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
17850 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
17851 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
17852 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
17853 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
17854 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
17855 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
17856 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17857 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17858 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
17859
17860 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, 0 },
17861 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, 0 },
17862 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, 0 },
17863 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, 0 },
17864
17865 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, 0 },
17866 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, 0 },
17867 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, 0 },
17868 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, 0 },
17869
17870 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, 0 },
17871 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, 0 },
17872 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, 0 },
17873 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, 0 },
17874 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, 0 },
17875
17876 /* MMX */
17877 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
17878 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
17879 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
17880 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
17881 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
17882 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
17883 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
17884 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
17885
17886 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
17887 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
17888 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, 0 },
17889 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, 0 },
17890 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, 0 },
17891 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, 0 },
17892 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, 0 },
17893 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, 0 },
17894
17895 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, 0 },
17896 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, 0 },
17897 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, 0 },
17898
17899 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, 0 },
17900 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, 0 },
17901 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, 0 },
17902 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, 0 },
17903
17904 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, 0 },
17905 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, 0 },
17906
17907 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, 0 },
17908 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, 0 },
17909 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, 0 },
17910 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, 0 },
17911 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, 0 },
17912 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, 0 },
17913
17914 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, 0 },
17915 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, 0 },
17916 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, 0 },
17917 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, 0 },
17918
17919 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, 0 },
17920 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, 0 },
17921 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, 0 },
17922 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, 0 },
17923 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, 0 },
17924 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, 0 },
17925
17926 /* Special. */
17927 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, UNKNOWN, 0 },
17928 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, UNKNOWN, 0 },
17929 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, UNKNOWN, 0 },
17930
17931 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, UNKNOWN, 0 },
17932 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
17933 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
17934
17935 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
17936 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
17937 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
17938 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
17939 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
17940 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
17941
17942 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
17943 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
17944 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
17945 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
17946 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
17947 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
17948
17949 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
17950 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
17951 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
17952 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
17953
17954 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
17955 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
17956
17957 /* SSE2 */
17958 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, 0 },
17959 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, 0 },
17960 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, 0 },
17961 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, 0 },
17962 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, 0 },
17963 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, 0 },
17964 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, 0 },
17965 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, 0 },
17966
17967 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
17968 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
17969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
17970 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
17971 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
17972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
17973 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
17974 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
17975 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
17976 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17977 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17978 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
17979 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
17980 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
17981 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
17982 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
17983 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
17984 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
17985 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
17986 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
17987
17988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, 0 },
17989 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, 0 },
17990 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, 0 },
17991 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, 0 },
17992
17993 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, 0 },
17994 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, 0 },
17995 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, 0 },
17996 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, 0 },
17997
17998 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, 0 },
17999 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
18000 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
18001
18002 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 },
18003
18004 /* SSE2 MMX */
18005 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
18006 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
18007 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, 0 },
18008 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, 0 },
18009 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, 0 },
18010 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, 0 },
18011 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, 0 },
18012 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, 0 },
18013
18014 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, 0 },
18015 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, 0 },
18016 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, 0 },
18017 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, 0 },
18018 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, 0 },
18019 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, 0 },
18020 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, 0 },
18021 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, 0 },
18022
18023 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, 0 },
18024 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN, 0 },
18025
18026 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, 0 },
18027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, 0 },
18028 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, 0 },
18029 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, 0 },
18030
18031 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, 0 },
18032 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, 0 },
18033
18034 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, 0 },
18035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, 0 },
18036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, 0 },
18037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, 0 },
18038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, 0 },
18039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, 0 },
18040
18041 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, 0 },
18042 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, 0 },
18043 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, 0 },
18044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, 0 },
18045
18046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, 0 },
18047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, 0 },
18048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, 0 },
18049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, 0 },
18050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, 0 },
18051 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, 0 },
18052 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, 0 },
18053 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, 0 },
18054
18055 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, 0 },
18056 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, 0 },
18057 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, 0 },
18058
18059 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
18060 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
18061
18062 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
18063 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
18064
18065 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
18066 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
18067 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
18068
18069 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
18070 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
18071 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
18072
18073 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
18074 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
18075
18076 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
18077
18078 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
18079 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, UNKNOWN, 0 },
18080 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, UNKNOWN, 0 },
18081 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, UNKNOWN, 0 },
18082
18083 /* SSE3 MMX */
18084 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, 0 },
18085 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, 0 },
18086 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, 0 },
18087 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, 0 },
18088 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, 0 },
18089 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, 0 },
18090
18091 /* SSSE3 */
18092 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, 0 },
18093 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, 0 },
18094 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, 0 },
18095 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, 0 },
18096 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, 0 },
18097 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, 0 },
18098 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, 0 },
18099 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, 0 },
18100 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, 0 },
18101 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, 0 },
18102 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, 0 },
18103 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, 0 },
18104 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, 0 },
18105 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, 0 },
18106 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, 0 },
18107 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, 0 },
18108 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, 0 },
18109 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, 0 },
18110 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, 0 },
18111 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, 0 },
18112 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, 0 },
18113 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, 0 },
18114 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, 0 },
18115 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, 0 },
18116
18117 /* SSE4.1 */
18118 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
18119 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
18120 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
18121 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
18122 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
18123 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
18124 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
18125 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
18126 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
18127 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
18128 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
18129 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
18130
18131 /* SSE4.2 */
18132 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
18133 };
18134
18135 static const struct builtin_description bdesc_1arg[] =
18136 {
18137 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
18138 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
18139
18140 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
18141 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
18142 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
18143
18144 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
18145 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, UNKNOWN, 0 },
18146 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, UNKNOWN, 0 },
18147 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, UNKNOWN, 0 },
18148 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
18149 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
18150
18151 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
18152 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
18153
18154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, UNKNOWN, 0 },
18155
18156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, UNKNOWN, 0 },
18157 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, UNKNOWN, 0 },
18158
18159 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, UNKNOWN, 0 },
18160 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, UNKNOWN, 0 },
18161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, UNKNOWN, 0 },
18162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, 0 },
18163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, UNKNOWN, 0 },
18164
18165 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, UNKNOWN, 0 },
18166
18167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, UNKNOWN, 0 },
18168 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, UNKNOWN, 0 },
18169 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, UNKNOWN, 0 },
18170 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, 0 },
18171
18172 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, UNKNOWN, 0 },
18173 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, UNKNOWN, 0 },
18174 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, 0 },
18175
18176 /* SSE3 */
18177 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, 0 },
18178 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, 0 },
18179
18180 /* SSSE3 */
18181 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, 0 },
18182 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 0 },
18183 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, 0 },
18184 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 0 },
18185 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, 0 },
18186 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 0 },
18187
18188 /* SSE4.1 */
18189 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
18190 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
18191 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
18192 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
18193 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
18194 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
18195 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
18196 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
18197 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
18198 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
18199 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
18200 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
18201 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
18202
18203 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
18204 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
18205 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
18206 };
18207
18208 /* SSE5 */
18209 enum multi_arg_type {
18210 MULTI_ARG_UNKNOWN,
18211 MULTI_ARG_3_SF,
18212 MULTI_ARG_3_DF,
18213 MULTI_ARG_3_DI,
18214 MULTI_ARG_3_SI,
18215 MULTI_ARG_3_SI_DI,
18216 MULTI_ARG_3_HI,
18217 MULTI_ARG_3_HI_SI,
18218 MULTI_ARG_3_QI,
18219 MULTI_ARG_3_PERMPS,
18220 MULTI_ARG_3_PERMPD,
18221 MULTI_ARG_2_SF,
18222 MULTI_ARG_2_DF,
18223 MULTI_ARG_2_DI,
18224 MULTI_ARG_2_SI,
18225 MULTI_ARG_2_HI,
18226 MULTI_ARG_2_QI,
18227 MULTI_ARG_2_DI_IMM,
18228 MULTI_ARG_2_SI_IMM,
18229 MULTI_ARG_2_HI_IMM,
18230 MULTI_ARG_2_QI_IMM,
18231 MULTI_ARG_2_SF_CMP,
18232 MULTI_ARG_2_DF_CMP,
18233 MULTI_ARG_2_DI_CMP,
18234 MULTI_ARG_2_SI_CMP,
18235 MULTI_ARG_2_HI_CMP,
18236 MULTI_ARG_2_QI_CMP,
18237 MULTI_ARG_2_DI_TF,
18238 MULTI_ARG_2_SI_TF,
18239 MULTI_ARG_2_HI_TF,
18240 MULTI_ARG_2_QI_TF,
18241 MULTI_ARG_2_SF_TF,
18242 MULTI_ARG_2_DF_TF,
18243 MULTI_ARG_1_SF,
18244 MULTI_ARG_1_DF,
18245 MULTI_ARG_1_DI,
18246 MULTI_ARG_1_SI,
18247 MULTI_ARG_1_HI,
18248 MULTI_ARG_1_QI,
18249 MULTI_ARG_1_SI_DI,
18250 MULTI_ARG_1_HI_DI,
18251 MULTI_ARG_1_HI_SI,
18252 MULTI_ARG_1_QI_DI,
18253 MULTI_ARG_1_QI_SI,
18254 MULTI_ARG_1_QI_HI,
18255 MULTI_ARG_1_PH2PS,
18256 MULTI_ARG_1_PS2PH
18257 };
18258
18259 static const struct builtin_description bdesc_multi_arg[] =
18260 {
18261 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
18262 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
18263 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
18264 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
18265 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
18266 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
18267 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
18268 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
18269 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
18270 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
18271 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
18272 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
18273 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
18274 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
18275 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
18276 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
18277 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18278 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18279 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
18280 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
18281 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
18282 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
18283 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
18284 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
18285 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
18286 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
18287 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
18288 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
18289 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18290 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
18291 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
18292 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
18293 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18294 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18295 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18296 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18297 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18298 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
18299 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
18300 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
18301 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
18302 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
18303 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
18304 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
18305 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
18306 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
18307 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
18308 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
18309 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
18310 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
18311 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
18312 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
18313 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
18314 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
18315 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
18316 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
18317 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
18318 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
18319 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
18320 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
18321 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
18322 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
18323 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
18324 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
18325 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
18326 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
18327 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
18328 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
18329 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
18330 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
18331 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
18332 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
18333 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
18334 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
18335 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
18336
18337 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
18338 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18339 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18340 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
18341 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
18342 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
18343 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
18344 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18345 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18346 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18347 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18348 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18349 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18350 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18351 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18352 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18353
18354 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
18355 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18356 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18357 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
18358 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
18359 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
18360 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
18361 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18362 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18363 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18364 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18365 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18366 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18367 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18368 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18369 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18370
18371 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
18372 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18373 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18374 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
18375 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
18376 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
18377 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
18378 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18379 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18380 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18381 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18382 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18383 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18384 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18385 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18386 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18387
18388 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
18389 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18390 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18391 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
18392 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
18393 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
18394 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
18395 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18396 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18397 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18398 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18399 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18400 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18401 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18402 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18403 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18404
18405 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
18406 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18407 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18408 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
18409 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
18410 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
18411 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
18412
18413 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
18414 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18415 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18416 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
18417 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
18418 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
18419 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
18420
18421 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
18422 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18423 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18424 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
18425 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
18426 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
18427 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
18428
18429 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18430 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18431 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18432 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
18433 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
18434 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
18435 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
18436
18437 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
18438 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18439 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18440 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
18441 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
18442 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
18443 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
18444
18445 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
18446 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18447 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18448 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
18449 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
18450 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
18451 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
18452
18453 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
18454 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18455 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18456 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
18457 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
18458 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
18459 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
18460
18461 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18462 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18463 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18464 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
18465 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
18466 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
18467 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
18468
18469 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
18470 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
18471 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
18472 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
18473 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
18474 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
18475 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
18476 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
18477
18478 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18479 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18480 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18481 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18482 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18483 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18484 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18485 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18486
18487 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18488 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18489 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18490 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18491 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18492 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18493 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18494 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18495 };
18496
18497 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
18498 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
18499 builtins. */
18500 static void
18501 ix86_init_mmx_sse_builtins (void)
18502 {
18503 const struct builtin_description * d;
18504 size_t i;
18505
18506 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
18507 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
18508 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
18509 tree V2DI_type_node
18510 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
18511 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
18512 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
18513 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
18514 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
18515 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
18516 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
18517
18518 tree pchar_type_node = build_pointer_type (char_type_node);
18519 tree pcchar_type_node = build_pointer_type (
18520 build_type_variant (char_type_node, 1, 0));
18521 tree pfloat_type_node = build_pointer_type (float_type_node);
18522 tree pcfloat_type_node = build_pointer_type (
18523 build_type_variant (float_type_node, 1, 0));
18524 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
18525 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
18526 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
18527
18528 /* Comparisons. */
18529 tree int_ftype_v4sf_v4sf
18530 = build_function_type_list (integer_type_node,
18531 V4SF_type_node, V4SF_type_node, NULL_TREE);
18532 tree v4si_ftype_v4sf_v4sf
18533 = build_function_type_list (V4SI_type_node,
18534 V4SF_type_node, V4SF_type_node, NULL_TREE);
18535 /* MMX/SSE/integer conversions. */
18536 tree int_ftype_v4sf
18537 = build_function_type_list (integer_type_node,
18538 V4SF_type_node, NULL_TREE);
18539 tree int64_ftype_v4sf
18540 = build_function_type_list (long_long_integer_type_node,
18541 V4SF_type_node, NULL_TREE);
18542 tree int_ftype_v8qi
18543 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
18544 tree v4sf_ftype_v4sf_int
18545 = build_function_type_list (V4SF_type_node,
18546 V4SF_type_node, integer_type_node, NULL_TREE);
18547 tree v4sf_ftype_v4sf_int64
18548 = build_function_type_list (V4SF_type_node,
18549 V4SF_type_node, long_long_integer_type_node,
18550 NULL_TREE);
18551 tree v4sf_ftype_v4sf_v2si
18552 = build_function_type_list (V4SF_type_node,
18553 V4SF_type_node, V2SI_type_node, NULL_TREE);
18554
18555 /* Miscellaneous. */
18556 tree v8qi_ftype_v4hi_v4hi
18557 = build_function_type_list (V8QI_type_node,
18558 V4HI_type_node, V4HI_type_node, NULL_TREE);
18559 tree v4hi_ftype_v2si_v2si
18560 = build_function_type_list (V4HI_type_node,
18561 V2SI_type_node, V2SI_type_node, NULL_TREE);
18562 tree v4sf_ftype_v4sf_v4sf_int
18563 = build_function_type_list (V4SF_type_node,
18564 V4SF_type_node, V4SF_type_node,
18565 integer_type_node, NULL_TREE);
18566 tree v2si_ftype_v4hi_v4hi
18567 = build_function_type_list (V2SI_type_node,
18568 V4HI_type_node, V4HI_type_node, NULL_TREE);
18569 tree v4hi_ftype_v4hi_int
18570 = build_function_type_list (V4HI_type_node,
18571 V4HI_type_node, integer_type_node, NULL_TREE);
18572 tree v4hi_ftype_v4hi_di
18573 = build_function_type_list (V4HI_type_node,
18574 V4HI_type_node, long_long_unsigned_type_node,
18575 NULL_TREE);
18576 tree v2si_ftype_v2si_di
18577 = build_function_type_list (V2SI_type_node,
18578 V2SI_type_node, long_long_unsigned_type_node,
18579 NULL_TREE);
18580 tree void_ftype_void
18581 = build_function_type (void_type_node, void_list_node);
18582 tree void_ftype_unsigned
18583 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
18584 tree void_ftype_unsigned_unsigned
18585 = build_function_type_list (void_type_node, unsigned_type_node,
18586 unsigned_type_node, NULL_TREE);
18587 tree void_ftype_pcvoid_unsigned_unsigned
18588 = build_function_type_list (void_type_node, const_ptr_type_node,
18589 unsigned_type_node, unsigned_type_node,
18590 NULL_TREE);
18591 tree unsigned_ftype_void
18592 = build_function_type (unsigned_type_node, void_list_node);
18593 tree v2si_ftype_v4sf
18594 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
18595 /* Loads/stores. */
18596 tree void_ftype_v8qi_v8qi_pchar
18597 = build_function_type_list (void_type_node,
18598 V8QI_type_node, V8QI_type_node,
18599 pchar_type_node, NULL_TREE);
18600 tree v4sf_ftype_pcfloat
18601 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
18602 /* @@@ the type is bogus */
18603 tree v4sf_ftype_v4sf_pv2si
18604 = build_function_type_list (V4SF_type_node,
18605 V4SF_type_node, pv2si_type_node, NULL_TREE);
18606 tree void_ftype_pv2si_v4sf
18607 = build_function_type_list (void_type_node,
18608 pv2si_type_node, V4SF_type_node, NULL_TREE);
18609 tree void_ftype_pfloat_v4sf
18610 = build_function_type_list (void_type_node,
18611 pfloat_type_node, V4SF_type_node, NULL_TREE);
18612 tree void_ftype_pdi_di
18613 = build_function_type_list (void_type_node,
18614 pdi_type_node, long_long_unsigned_type_node,
18615 NULL_TREE);
18616 tree void_ftype_pv2di_v2di
18617 = build_function_type_list (void_type_node,
18618 pv2di_type_node, V2DI_type_node, NULL_TREE);
18619 /* Normal vector unops. */
18620 tree v4sf_ftype_v4sf
18621 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
18622 tree v16qi_ftype_v16qi
18623 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
18624 tree v8hi_ftype_v8hi
18625 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
18626 tree v4si_ftype_v4si
18627 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
18628 tree v8qi_ftype_v8qi
18629 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
18630 tree v4hi_ftype_v4hi
18631 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
18632
18633 /* Normal vector binops. */
18634 tree v4sf_ftype_v4sf_v4sf
18635 = build_function_type_list (V4SF_type_node,
18636 V4SF_type_node, V4SF_type_node, NULL_TREE);
18637 tree v8qi_ftype_v8qi_v8qi
18638 = build_function_type_list (V8QI_type_node,
18639 V8QI_type_node, V8QI_type_node, NULL_TREE);
18640 tree v4hi_ftype_v4hi_v4hi
18641 = build_function_type_list (V4HI_type_node,
18642 V4HI_type_node, V4HI_type_node, NULL_TREE);
18643 tree v2si_ftype_v2si_v2si
18644 = build_function_type_list (V2SI_type_node,
18645 V2SI_type_node, V2SI_type_node, NULL_TREE);
18646 tree di_ftype_di_di
18647 = build_function_type_list (long_long_unsigned_type_node,
18648 long_long_unsigned_type_node,
18649 long_long_unsigned_type_node, NULL_TREE);
18650
18651 tree di_ftype_di_di_int
18652 = build_function_type_list (long_long_unsigned_type_node,
18653 long_long_unsigned_type_node,
18654 long_long_unsigned_type_node,
18655 integer_type_node, NULL_TREE);
18656
18657 tree v2si_ftype_v2sf
18658 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
18659 tree v2sf_ftype_v2si
18660 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
18661 tree v2si_ftype_v2si
18662 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
18663 tree v2sf_ftype_v2sf
18664 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
18665 tree v2sf_ftype_v2sf_v2sf
18666 = build_function_type_list (V2SF_type_node,
18667 V2SF_type_node, V2SF_type_node, NULL_TREE);
18668 tree v2si_ftype_v2sf_v2sf
18669 = build_function_type_list (V2SI_type_node,
18670 V2SF_type_node, V2SF_type_node, NULL_TREE);
18671 tree pint_type_node = build_pointer_type (integer_type_node);
18672 tree pdouble_type_node = build_pointer_type (double_type_node);
18673 tree pcdouble_type_node = build_pointer_type (
18674 build_type_variant (double_type_node, 1, 0));
18675 tree int_ftype_v2df_v2df
18676 = build_function_type_list (integer_type_node,
18677 V2DF_type_node, V2DF_type_node, NULL_TREE);
18678
18679 tree void_ftype_pcvoid
18680 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
18681 tree v4sf_ftype_v4si
18682 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
18683 tree v4si_ftype_v4sf
18684 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
18685 tree v2df_ftype_v4si
18686 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
18687 tree v4si_ftype_v2df
18688 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
18689 tree v4si_ftype_v2df_v2df
18690 = build_function_type_list (V4SI_type_node,
18691 V2DF_type_node, V2DF_type_node, NULL_TREE);
18692 tree v2si_ftype_v2df
18693 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
18694 tree v4sf_ftype_v2df
18695 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
18696 tree v2df_ftype_v2si
18697 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
18698 tree v2df_ftype_v4sf
18699 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
18700 tree int_ftype_v2df
18701 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
18702 tree int64_ftype_v2df
18703 = build_function_type_list (long_long_integer_type_node,
18704 V2DF_type_node, NULL_TREE);
18705 tree v2df_ftype_v2df_int
18706 = build_function_type_list (V2DF_type_node,
18707 V2DF_type_node, integer_type_node, NULL_TREE);
18708 tree v2df_ftype_v2df_int64
18709 = build_function_type_list (V2DF_type_node,
18710 V2DF_type_node, long_long_integer_type_node,
18711 NULL_TREE);
18712 tree v4sf_ftype_v4sf_v2df
18713 = build_function_type_list (V4SF_type_node,
18714 V4SF_type_node, V2DF_type_node, NULL_TREE);
18715 tree v2df_ftype_v2df_v4sf
18716 = build_function_type_list (V2DF_type_node,
18717 V2DF_type_node, V4SF_type_node, NULL_TREE);
18718 tree v2df_ftype_v2df_v2df_int
18719 = build_function_type_list (V2DF_type_node,
18720 V2DF_type_node, V2DF_type_node,
18721 integer_type_node,
18722 NULL_TREE);
18723 tree v2df_ftype_v2df_pcdouble
18724 = build_function_type_list (V2DF_type_node,
18725 V2DF_type_node, pcdouble_type_node, NULL_TREE);
18726 tree void_ftype_pdouble_v2df
18727 = build_function_type_list (void_type_node,
18728 pdouble_type_node, V2DF_type_node, NULL_TREE);
18729 tree void_ftype_pint_int
18730 = build_function_type_list (void_type_node,
18731 pint_type_node, integer_type_node, NULL_TREE);
18732 tree void_ftype_v16qi_v16qi_pchar
18733 = build_function_type_list (void_type_node,
18734 V16QI_type_node, V16QI_type_node,
18735 pchar_type_node, NULL_TREE);
18736 tree v2df_ftype_pcdouble
18737 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
18738 tree v2df_ftype_v2df_v2df
18739 = build_function_type_list (V2DF_type_node,
18740 V2DF_type_node, V2DF_type_node, NULL_TREE);
18741 tree v16qi_ftype_v16qi_v16qi
18742 = build_function_type_list (V16QI_type_node,
18743 V16QI_type_node, V16QI_type_node, NULL_TREE);
18744 tree v8hi_ftype_v8hi_v8hi
18745 = build_function_type_list (V8HI_type_node,
18746 V8HI_type_node, V8HI_type_node, NULL_TREE);
18747 tree v4si_ftype_v4si_v4si
18748 = build_function_type_list (V4SI_type_node,
18749 V4SI_type_node, V4SI_type_node, NULL_TREE);
18750 tree v2di_ftype_v2di_v2di
18751 = build_function_type_list (V2DI_type_node,
18752 V2DI_type_node, V2DI_type_node, NULL_TREE);
18753 tree v2di_ftype_v2df_v2df
18754 = build_function_type_list (V2DI_type_node,
18755 V2DF_type_node, V2DF_type_node, NULL_TREE);
18756 tree v2df_ftype_v2df
18757 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
18758 tree v2di_ftype_v2di_int
18759 = build_function_type_list (V2DI_type_node,
18760 V2DI_type_node, integer_type_node, NULL_TREE);
18761 tree v2di_ftype_v2di_v2di_int
18762 = build_function_type_list (V2DI_type_node, V2DI_type_node,
18763 V2DI_type_node, integer_type_node, NULL_TREE);
18764 tree v4si_ftype_v4si_int
18765 = build_function_type_list (V4SI_type_node,
18766 V4SI_type_node, integer_type_node, NULL_TREE);
18767 tree v8hi_ftype_v8hi_int
18768 = build_function_type_list (V8HI_type_node,
18769 V8HI_type_node, integer_type_node, NULL_TREE);
18770 tree v4si_ftype_v8hi_v8hi
18771 = build_function_type_list (V4SI_type_node,
18772 V8HI_type_node, V8HI_type_node, NULL_TREE);
18773 tree di_ftype_v8qi_v8qi
18774 = build_function_type_list (long_long_unsigned_type_node,
18775 V8QI_type_node, V8QI_type_node, NULL_TREE);
18776 tree di_ftype_v2si_v2si
18777 = build_function_type_list (long_long_unsigned_type_node,
18778 V2SI_type_node, V2SI_type_node, NULL_TREE);
18779 tree v2di_ftype_v16qi_v16qi
18780 = build_function_type_list (V2DI_type_node,
18781 V16QI_type_node, V16QI_type_node, NULL_TREE);
18782 tree v2di_ftype_v4si_v4si
18783 = build_function_type_list (V2DI_type_node,
18784 V4SI_type_node, V4SI_type_node, NULL_TREE);
18785 tree int_ftype_v16qi
18786 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
18787 tree v16qi_ftype_pcchar
18788 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
18789 tree void_ftype_pchar_v16qi
18790 = build_function_type_list (void_type_node,
18791 pchar_type_node, V16QI_type_node, NULL_TREE);
18792
18793 tree v2di_ftype_v2di_unsigned_unsigned
18794 = build_function_type_list (V2DI_type_node, V2DI_type_node,
18795 unsigned_type_node, unsigned_type_node,
18796 NULL_TREE);
18797 tree v2di_ftype_v2di_v2di_unsigned_unsigned
18798 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
18799 unsigned_type_node, unsigned_type_node,
18800 NULL_TREE);
18801 tree v2di_ftype_v2di_v16qi
18802 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
18803 NULL_TREE);
18804 tree v2df_ftype_v2df_v2df_v2df
18805 = build_function_type_list (V2DF_type_node,
18806 V2DF_type_node, V2DF_type_node,
18807 V2DF_type_node, NULL_TREE);
18808 tree v4sf_ftype_v4sf_v4sf_v4sf
18809 = build_function_type_list (V4SF_type_node,
18810 V4SF_type_node, V4SF_type_node,
18811 V4SF_type_node, NULL_TREE);
18812 tree v8hi_ftype_v16qi
18813 = build_function_type_list (V8HI_type_node, V16QI_type_node,
18814 NULL_TREE);
18815 tree v4si_ftype_v16qi
18816 = build_function_type_list (V4SI_type_node, V16QI_type_node,
18817 NULL_TREE);
18818 tree v2di_ftype_v16qi
18819 = build_function_type_list (V2DI_type_node, V16QI_type_node,
18820 NULL_TREE);
18821 tree v4si_ftype_v8hi
18822 = build_function_type_list (V4SI_type_node, V8HI_type_node,
18823 NULL_TREE);
18824 tree v2di_ftype_v8hi
18825 = build_function_type_list (V2DI_type_node, V8HI_type_node,
18826 NULL_TREE);
18827 tree v2di_ftype_v4si
18828 = build_function_type_list (V2DI_type_node, V4SI_type_node,
18829 NULL_TREE);
18830 tree v2di_ftype_pv2di
18831 = build_function_type_list (V2DI_type_node, pv2di_type_node,
18832 NULL_TREE);
18833 tree v16qi_ftype_v16qi_v16qi_int
18834 = build_function_type_list (V16QI_type_node, V16QI_type_node,
18835 V16QI_type_node, integer_type_node,
18836 NULL_TREE);
18837 tree v16qi_ftype_v16qi_v16qi_v16qi
18838 = build_function_type_list (V16QI_type_node, V16QI_type_node,
18839 V16QI_type_node, V16QI_type_node,
18840 NULL_TREE);
18841 tree v8hi_ftype_v8hi_v8hi_int
18842 = build_function_type_list (V8HI_type_node, V8HI_type_node,
18843 V8HI_type_node, integer_type_node,
18844 NULL_TREE);
18845 tree v4si_ftype_v4si_v4si_int
18846 = build_function_type_list (V4SI_type_node, V4SI_type_node,
18847 V4SI_type_node, integer_type_node,
18848 NULL_TREE);
18849 tree int_ftype_v2di_v2di
18850 = build_function_type_list (integer_type_node,
18851 V2DI_type_node, V2DI_type_node,
18852 NULL_TREE);
18853 tree int_ftype_v16qi_int_v16qi_int_int
18854 = build_function_type_list (integer_type_node,
18855 V16QI_type_node,
18856 integer_type_node,
18857 V16QI_type_node,
18858 integer_type_node,
18859 integer_type_node,
18860 NULL_TREE);
18861 tree v16qi_ftype_v16qi_int_v16qi_int_int
18862 = build_function_type_list (V16QI_type_node,
18863 V16QI_type_node,
18864 integer_type_node,
18865 V16QI_type_node,
18866 integer_type_node,
18867 integer_type_node,
18868 NULL_TREE);
18869 tree int_ftype_v16qi_v16qi_int
18870 = build_function_type_list (integer_type_node,
18871 V16QI_type_node,
18872 V16QI_type_node,
18873 integer_type_node,
18874 NULL_TREE);
18875
18876 /* SSE5 instructions */
18877 tree v2di_ftype_v2di_v2di_v2di
18878 = build_function_type_list (V2DI_type_node,
18879 V2DI_type_node,
18880 V2DI_type_node,
18881 V2DI_type_node,
18882 NULL_TREE);
18883
18884 tree v4si_ftype_v4si_v4si_v4si
18885 = build_function_type_list (V4SI_type_node,
18886 V4SI_type_node,
18887 V4SI_type_node,
18888 V4SI_type_node,
18889 NULL_TREE);
18890
18891 tree v4si_ftype_v4si_v4si_v2di
18892 = build_function_type_list (V4SI_type_node,
18893 V4SI_type_node,
18894 V4SI_type_node,
18895 V2DI_type_node,
18896 NULL_TREE);
18897
18898 tree v8hi_ftype_v8hi_v8hi_v8hi
18899 = build_function_type_list (V8HI_type_node,
18900 V8HI_type_node,
18901 V8HI_type_node,
18902 V8HI_type_node,
18903 NULL_TREE);
18904
18905 tree v8hi_ftype_v8hi_v8hi_v4si
18906 = build_function_type_list (V8HI_type_node,
18907 V8HI_type_node,
18908 V8HI_type_node,
18909 V4SI_type_node,
18910 NULL_TREE);
18911
18912 tree v2df_ftype_v2df_v2df_v16qi
18913 = build_function_type_list (V2DF_type_node,
18914 V2DF_type_node,
18915 V2DF_type_node,
18916 V16QI_type_node,
18917 NULL_TREE);
18918
18919 tree v4sf_ftype_v4sf_v4sf_v16qi
18920 = build_function_type_list (V4SF_type_node,
18921 V4SF_type_node,
18922 V4SF_type_node,
18923 V16QI_type_node,
18924 NULL_TREE);
18925
18926 tree v2di_ftype_v2di_si
18927 = build_function_type_list (V2DI_type_node,
18928 V2DI_type_node,
18929 integer_type_node,
18930 NULL_TREE);
18931
18932 tree v4si_ftype_v4si_si
18933 = build_function_type_list (V4SI_type_node,
18934 V4SI_type_node,
18935 integer_type_node,
18936 NULL_TREE);
18937
18938 tree v8hi_ftype_v8hi_si
18939 = build_function_type_list (V8HI_type_node,
18940 V8HI_type_node,
18941 integer_type_node,
18942 NULL_TREE);
18943
18944 tree v16qi_ftype_v16qi_si
18945 = build_function_type_list (V16QI_type_node,
18946 V16QI_type_node,
18947 integer_type_node,
18948 NULL_TREE);
18949 tree v4sf_ftype_v4hi
18950 = build_function_type_list (V4SF_type_node,
18951 V4HI_type_node,
18952 NULL_TREE);
18953
18954 tree v4hi_ftype_v4sf
18955 = build_function_type_list (V4HI_type_node,
18956 V4SF_type_node,
18957 NULL_TREE);
18958
18959 tree v2di_ftype_v2di
18960 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
18961
18962 tree ftype;
18963
18964 /* The __float80 type. */
18965 if (TYPE_MODE (long_double_type_node) == XFmode)
18966 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
18967 "__float80");
18968 else
18969 {
18970 /* The __float80 type. */
18971 tree float80_type_node = make_node (REAL_TYPE);
18972
18973 TYPE_PRECISION (float80_type_node) = 80;
18974 layout_type (float80_type_node);
18975 (*lang_hooks.types.register_builtin_type) (float80_type_node,
18976 "__float80");
18977 }
18978
18979 if (TARGET_64BIT)
18980 {
18981 tree float128_type_node = make_node (REAL_TYPE);
18982
18983 TYPE_PRECISION (float128_type_node) = 128;
18984 layout_type (float128_type_node);
18985 (*lang_hooks.types.register_builtin_type) (float128_type_node,
18986 "__float128");
18987
18988 /* TFmode support builtins. */
18989 ftype = build_function_type (float128_type_node,
18990 void_list_node);
18991 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
18992
18993 ftype = build_function_type_list (float128_type_node,
18994 float128_type_node,
18995 NULL_TREE);
18996 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
18997
18998 ftype = build_function_type_list (float128_type_node,
18999 float128_type_node,
19000 float128_type_node,
19001 NULL_TREE);
19002 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
19003 }
19004
19005 /* Add all SSE builtins that are more or less simple operations on
19006 three operands. */
19007 for (i = 0, d = bdesc_sse_3arg;
19008 i < ARRAY_SIZE (bdesc_sse_3arg);
19009 i++, d++)
19010 {
19011 /* Use one of the operands; the target can have a different mode for
19012 mask-generating compares. */
19013 enum machine_mode mode;
19014 tree type;
19015
19016 if (d->name == 0)
19017 continue;
19018 mode = insn_data[d->icode].operand[1].mode;
19019
19020 switch (mode)
19021 {
19022 case V16QImode:
19023 type = v16qi_ftype_v16qi_v16qi_int;
19024 break;
19025 case V8HImode:
19026 type = v8hi_ftype_v8hi_v8hi_int;
19027 break;
19028 case V4SImode:
19029 type = v4si_ftype_v4si_v4si_int;
19030 break;
19031 case V2DImode:
19032 type = v2di_ftype_v2di_v2di_int;
19033 break;
19034 case V2DFmode:
19035 type = v2df_ftype_v2df_v2df_int;
19036 break;
19037 case V4SFmode:
19038 type = v4sf_ftype_v4sf_v4sf_int;
19039 break;
19040 default:
19041 gcc_unreachable ();
19042 }
19043
19044 /* Override for variable blends. */
19045 switch (d->icode)
19046 {
19047 case CODE_FOR_sse4_1_blendvpd:
19048 type = v2df_ftype_v2df_v2df_v2df;
19049 break;
19050 case CODE_FOR_sse4_1_blendvps:
19051 type = v4sf_ftype_v4sf_v4sf_v4sf;
19052 break;
19053 case CODE_FOR_sse4_1_pblendvb:
19054 type = v16qi_ftype_v16qi_v16qi_v16qi;
19055 break;
19056 default:
19057 break;
19058 }
19059
19060 def_builtin_const (d->mask, d->name, type, d->code);
19061 }
19062
19063 /* Add all builtins that are more or less simple operations on two
19064 operands. */
19065 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19066 {
19067 /* Use one of the operands; the target can have a different mode for
19068 mask-generating compares. */
19069 enum machine_mode mode;
19070 tree type;
19071
19072 if (d->name == 0)
19073 continue;
19074 mode = insn_data[d->icode].operand[1].mode;
19075
19076 switch (mode)
19077 {
19078 case V16QImode:
19079 type = v16qi_ftype_v16qi_v16qi;
19080 break;
19081 case V8HImode:
19082 type = v8hi_ftype_v8hi_v8hi;
19083 break;
19084 case V4SImode:
19085 type = v4si_ftype_v4si_v4si;
19086 break;
19087 case V2DImode:
19088 type = v2di_ftype_v2di_v2di;
19089 break;
19090 case V2DFmode:
19091 type = v2df_ftype_v2df_v2df;
19092 break;
19093 case V4SFmode:
19094 type = v4sf_ftype_v4sf_v4sf;
19095 break;
19096 case V8QImode:
19097 type = v8qi_ftype_v8qi_v8qi;
19098 break;
19099 case V4HImode:
19100 type = v4hi_ftype_v4hi_v4hi;
19101 break;
19102 case V2SImode:
19103 type = v2si_ftype_v2si_v2si;
19104 break;
19105 case DImode:
19106 type = di_ftype_di_di;
19107 break;
19108
19109 default:
19110 gcc_unreachable ();
19111 }
19112
19113 /* Override for comparisons. */
19114 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19115 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
19116 type = v4si_ftype_v4sf_v4sf;
19117
19118 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
19119 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19120 type = v2di_ftype_v2df_v2df;
19121
19122 if (d->icode == CODE_FOR_vec_pack_sfix_v2df)
19123 type = v4si_ftype_v2df_v2df;
19124
19125 def_builtin_const (d->mask, d->name, type, d->code);
19126 }
19127
19128 /* Add all builtins that are more or less simple operations on 1 operand. */
19129 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19130 {
19131 enum machine_mode mode;
19132 tree type;
19133
19134 if (d->name == 0)
19135 continue;
19136 mode = insn_data[d->icode].operand[1].mode;
19137
19138 switch (mode)
19139 {
19140 case V16QImode:
19141 type = v16qi_ftype_v16qi;
19142 break;
19143 case V8HImode:
19144 type = v8hi_ftype_v8hi;
19145 break;
19146 case V4SImode:
19147 type = v4si_ftype_v4si;
19148 break;
19149 case V2DFmode:
19150 type = v2df_ftype_v2df;
19151 break;
19152 case V4SFmode:
19153 type = v4sf_ftype_v4sf;
19154 break;
19155 case V8QImode:
19156 type = v8qi_ftype_v8qi;
19157 break;
19158 case V4HImode:
19159 type = v4hi_ftype_v4hi;
19160 break;
19161 case V2SImode:
19162 type = v2si_ftype_v2si;
19163 break;
19164
19165 default:
19166 abort ();
19167 }
19168
19169 def_builtin_const (d->mask, d->name, type, d->code);
19170 }
19171
19172 /* pcmpestr[im] insns. */
19173 for (i = 0, d = bdesc_pcmpestr;
19174 i < ARRAY_SIZE (bdesc_pcmpestr);
19175 i++, d++)
19176 {
19177 if (d->code == IX86_BUILTIN_PCMPESTRM128)
19178 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
19179 else
19180 ftype = int_ftype_v16qi_int_v16qi_int_int;
19181 def_builtin_const (d->mask, d->name, ftype, d->code);
19182 }
19183
19184 /* pcmpistr[im] insns. */
19185 for (i = 0, d = bdesc_pcmpistr;
19186 i < ARRAY_SIZE (bdesc_pcmpistr);
19187 i++, d++)
19188 {
19189 if (d->code == IX86_BUILTIN_PCMPISTRM128)
19190 ftype = v16qi_ftype_v16qi_v16qi_int;
19191 else
19192 ftype = int_ftype_v16qi_v16qi_int;
19193 def_builtin_const (d->mask, d->name, ftype, d->code);
19194 }
19195
19196 /* Add the remaining MMX insns with somewhat more complicated types. */
19197 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
19198 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
19199 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
19200 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
19201
19202 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
19203 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
19204 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
19205
19206 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
19207 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
19208
19209 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
19210 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
19211
19212 /* comi/ucomi insns. */
19213 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19214 if (d->mask == OPTION_MASK_ISA_SSE2)
19215 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
19216 else
19217 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
19218
19219 /* ptest insns. */
19220 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19221 def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code);
19222
19223 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
19224 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
19225 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
19226
19227 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
19228 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
19229 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
19230 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
19231 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
19232 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
19233 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
19234 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
19235 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
19236 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
19237 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
19238
19239 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
19240
19241 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
19242 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
19243
19244 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
19245 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
19246 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
19247 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
19248
19249 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
19250 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
19251 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
19252 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
19253
19254 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
19255
19256 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
19257
19258 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
19259 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
19260 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
19261 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
19262 ftype = build_function_type_list (float_type_node,
19263 float_type_node,
19264 NULL_TREE);
19265 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF);
19266 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
19267 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
19268
19269 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
19270
19271 /* Original 3DNow! */
19272 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
19273 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
19274 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
19275 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
19276 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
19277 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
19278 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
19279 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
19280 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
19281 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
19282 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
19283 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
19284 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
19285 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
19286 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
19287 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
19288 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
19289 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
19290 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
19291 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
19292
19293 /* 3DNow! extension as used in the Athlon CPU. */
19294 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
19295 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
19296 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
19297 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
19298 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
19299 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
19300
19301 /* SSE2 */
19302 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
19303
19304 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
19305 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
19306
19307 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
19308 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
19309
19310 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
19311 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
19312 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
19313 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
19314 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
19315
19316 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
19317 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
19318 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
19319 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
19320
19321 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
19322 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
19323
19324 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
19325
19326 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
19327 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
19328
19329 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
19330 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
19331 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
19332 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
19333 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
19334
19335 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
19336
19337 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
19338 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
19339 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
19340 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
19341
19342 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
19343 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
19344 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
19345
19346 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
19347 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
19348 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
19349 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
19350
19351 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
19352 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
19353 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
19354
19355 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
19356 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
19357
19358 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
19359 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
19360
19361 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
19362 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
19363 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
19364 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
19365 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
19366 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
19367 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
19368
19369 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
19370 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
19371 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
19372 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
19373 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
19374 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
19375 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
19376
19377 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
19378 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
19379 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
19380 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
19381
19382 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
19383
19384 /* Prescott New Instructions. */
19385 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
19386 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
19387 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
19388
19389 /* SSSE3. */
19390 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
19391 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
19392
19393 /* SSE4.1. */
19394 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
19395 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
19396 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
19397 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
19398 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
19399 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
19400 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
19401 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
19402 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
19403 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
19404 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
19405 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
19406 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
19407 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
19408
19409 /* SSE4.1 and SSE5 */
19410 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
19411 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
19412 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
19413 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
19414
19415 /* SSE4.2. */
19416 ftype = build_function_type_list (unsigned_type_node,
19417 unsigned_type_node,
19418 unsigned_char_type_node,
19419 NULL_TREE);
19420 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
19421 ftype = build_function_type_list (unsigned_type_node,
19422 unsigned_type_node,
19423 short_unsigned_type_node,
19424 NULL_TREE);
19425 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
19426 ftype = build_function_type_list (unsigned_type_node,
19427 unsigned_type_node,
19428 unsigned_type_node,
19429 NULL_TREE);
19430 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
19431 ftype = build_function_type_list (long_long_unsigned_type_node,
19432 long_long_unsigned_type_node,
19433 long_long_unsigned_type_node,
19434 NULL_TREE);
19435 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
19436
19437 /* AMDFAM10 SSE4A New built-ins */
19438 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
19439 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
19440 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
19441 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
19442 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
19443 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
19444
19445 /* Access to the vec_init patterns. */
19446 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
19447 integer_type_node, NULL_TREE);
19448 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
19449
19450 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
19451 short_integer_type_node,
19452 short_integer_type_node,
19453 short_integer_type_node, NULL_TREE);
19454 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
19455
19456 ftype = build_function_type_list (V8QI_type_node, char_type_node,
19457 char_type_node, char_type_node,
19458 char_type_node, char_type_node,
19459 char_type_node, char_type_node,
19460 char_type_node, NULL_TREE);
19461 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
19462
19463 /* Access to the vec_extract patterns. */
19464 ftype = build_function_type_list (double_type_node, V2DF_type_node,
19465 integer_type_node, NULL_TREE);
19466 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
19467
19468 ftype = build_function_type_list (long_long_integer_type_node,
19469 V2DI_type_node, integer_type_node,
19470 NULL_TREE);
19471 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
19472
19473 ftype = build_function_type_list (float_type_node, V4SF_type_node,
19474 integer_type_node, NULL_TREE);
19475 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
19476
19477 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
19478 integer_type_node, NULL_TREE);
19479 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
19480
19481 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
19482 integer_type_node, NULL_TREE);
19483 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
19484
19485 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
19486 integer_type_node, NULL_TREE);
19487 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
19488
19489 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
19490 integer_type_node, NULL_TREE);
19491 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
19492
19493 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
19494 integer_type_node, NULL_TREE);
19495 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
19496
19497 /* Access to the vec_set patterns. */
19498 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
19499 intDI_type_node,
19500 integer_type_node, NULL_TREE);
19501 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
19502
19503 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
19504 float_type_node,
19505 integer_type_node, NULL_TREE);
19506 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
19507
19508 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
19509 intSI_type_node,
19510 integer_type_node, NULL_TREE);
19511 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
19512
19513 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
19514 intHI_type_node,
19515 integer_type_node, NULL_TREE);
19516 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
19517
19518 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
19519 intHI_type_node,
19520 integer_type_node, NULL_TREE);
19521 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
19522
19523 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
19524 intQI_type_node,
19525 integer_type_node, NULL_TREE);
19526 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
19527
19528 /* Add SSE5 multi-arg argument instructions */
19529 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
19530 {
19531 tree mtype = NULL_TREE;
19532
19533 if (d->name == 0)
19534 continue;
19535
19536 switch ((enum multi_arg_type)d->flag)
19537 {
19538 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
19539 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
19540 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
19541 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
19542 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
19543 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
19544 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
19545 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
19546 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
19547 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
19548 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
19549 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
19550 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
19551 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
19552 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
19553 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
19554 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
19555 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
19556 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
19557 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
19558 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
19559 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
19560 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
19561 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
19562 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
19563 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
19564 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
19565 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
19566 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
19567 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
19568 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
19569 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
19570 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
19571 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
19572 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
19573 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
19574 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
19575 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
19576 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
19577 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
19578 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
19579 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
19580 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
19581 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
19582 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
19583 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
19584 case MULTI_ARG_UNKNOWN:
19585 default:
19586 gcc_unreachable ();
19587 }
19588
19589 if (mtype)
19590 def_builtin_const (d->mask, d->name, mtype, d->code);
19591 }
19592 }
19593
19594 static void
19595 ix86_init_builtins (void)
19596 {
19597 if (TARGET_MMX)
19598 ix86_init_mmx_sse_builtins ();
19599 }
19600
19601 /* Errors in the source file can cause expand_expr to return const0_rtx
19602 where we expect a vector. To avoid crashing, use one of the vector
19603 clear instructions. */
19604 static rtx
19605 safe_vector_operand (rtx x, enum machine_mode mode)
19606 {
19607 if (x == const0_rtx)
19608 x = CONST0_RTX (mode);
19609 return x;
19610 }
19611
19612 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
19613 4 operands. The third argument must be a constant smaller than 8
19614 bits or xmm0. */
19615
19616 static rtx
19617 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
19618 rtx target)
19619 {
19620 rtx pat;
19621 tree arg0 = CALL_EXPR_ARG (exp, 0);
19622 tree arg1 = CALL_EXPR_ARG (exp, 1);
19623 tree arg2 = CALL_EXPR_ARG (exp, 2);
19624 rtx op0 = expand_normal (arg0);
19625 rtx op1 = expand_normal (arg1);
19626 rtx op2 = expand_normal (arg2);
19627 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19628 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
19629 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
19630 enum machine_mode mode3 = insn_data[icode].operand[3].mode;
19631
19632 if (VECTOR_MODE_P (mode1))
19633 op0 = safe_vector_operand (op0, mode1);
19634 if (VECTOR_MODE_P (mode2))
19635 op1 = safe_vector_operand (op1, mode2);
19636 if (VECTOR_MODE_P (mode3))
19637 op2 = safe_vector_operand (op2, mode3);
19638
19639 if (optimize
19640 || target == 0
19641 || GET_MODE (target) != tmode
19642 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19643 target = gen_reg_rtx (tmode);
19644
19645 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19646 op0 = copy_to_mode_reg (mode1, op0);
19647 if ((optimize && !register_operand (op1, mode2))
19648 || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
19649 op1 = copy_to_mode_reg (mode2, op1);
19650
19651 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19652 switch (icode)
19653 {
19654 case CODE_FOR_sse4_1_blendvpd:
19655 case CODE_FOR_sse4_1_blendvps:
19656 case CODE_FOR_sse4_1_pblendvb:
19657 op2 = copy_to_mode_reg (mode3, op2);
19658 break;
19659
19660 case CODE_FOR_sse4_1_roundsd:
19661 case CODE_FOR_sse4_1_roundss:
19662 error ("the third argument must be a 4-bit immediate");
19663 return const0_rtx;
19664
19665 default:
19666 error ("the third argument must be an 8-bit immediate");
19667 return const0_rtx;
19668 }
19669
19670 pat = GEN_FCN (icode) (target, op0, op1, op2);
19671 if (! pat)
19672 return 0;
19673 emit_insn (pat);
19674 return target;
19675 }
19676
19677 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
19678
19679 static rtx
19680 ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
19681 {
19682 rtx pat;
19683 tree arg0 = CALL_EXPR_ARG (exp, 0);
19684 tree arg1 = CALL_EXPR_ARG (exp, 1);
19685 rtx op0 = expand_normal (arg0);
19686 rtx op1 = expand_normal (arg1);
19687 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19688 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19689 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19690
19691 if (optimize
19692 || !target
19693 || GET_MODE (target) != tmode
19694 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19695 target = gen_reg_rtx (tmode);
19696
19697 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
19698 op0 = copy_to_mode_reg (mode0, op0);
19699 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
19700 {
19701 op1 = copy_to_reg (op1);
19702 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
19703 }
19704
19705 pat = GEN_FCN (icode) (target, op0, op1);
19706 if (! pat)
19707 return 0;
19708 emit_insn (pat);
19709 return target;
19710 }
19711
19712 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
19713
19714 static rtx
19715 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
19716 {
19717 rtx pat, xops[3];
19718 tree arg0 = CALL_EXPR_ARG (exp, 0);
19719 tree arg1 = CALL_EXPR_ARG (exp, 1);
19720 rtx op0 = expand_normal (arg0);
19721 rtx op1 = expand_normal (arg1);
19722 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19723 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19724 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19725
19726 if (VECTOR_MODE_P (mode0))
19727 op0 = safe_vector_operand (op0, mode0);
19728 if (VECTOR_MODE_P (mode1))
19729 op1 = safe_vector_operand (op1, mode1);
19730
19731 if (optimize || !target
19732 || GET_MODE (target) != tmode
19733 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19734 target = gen_reg_rtx (tmode);
19735
19736 if (GET_MODE (op1) == SImode && mode1 == TImode)
19737 {
19738 rtx x = gen_reg_rtx (V4SImode);
19739 emit_insn (gen_sse2_loadd (x, op1));
19740 op1 = gen_lowpart (TImode, x);
19741 }
19742
19743 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
19744 op0 = copy_to_mode_reg (mode0, op0);
19745 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
19746 op1 = copy_to_mode_reg (mode1, op1);
19747
19748 /* ??? Using ix86_fixup_binary_operands is problematic when
19749 we've got mismatched modes. Fake it. */
19750
19751 xops[0] = target;
19752 xops[1] = op0;
19753 xops[2] = op1;
19754
19755 if (tmode == mode0 && tmode == mode1)
19756 {
19757 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
19758 op0 = xops[1];
19759 op1 = xops[2];
19760 }
19761 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
19762 {
19763 op0 = force_reg (mode0, op0);
19764 op1 = force_reg (mode1, op1);
19765 target = gen_reg_rtx (tmode);
19766 }
19767
19768 pat = GEN_FCN (icode) (target, op0, op1);
19769 if (! pat)
19770 return 0;
19771 emit_insn (pat);
19772 return target;
19773 }
19774
19775 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
19776
19777 static rtx
19778 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
19779 enum multi_arg_type m_type,
19780 enum insn_code sub_code)
19781 {
19782 rtx pat;
19783 int i;
19784 int nargs;
19785 bool comparison_p = false;
19786 bool tf_p = false;
19787 bool last_arg_constant = false;
19788 int num_memory = 0;
19789 struct {
19790 rtx op;
19791 enum machine_mode mode;
19792 } args[4];
19793
19794 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19795
19796 switch (m_type)
19797 {
19798 case MULTI_ARG_3_SF:
19799 case MULTI_ARG_3_DF:
19800 case MULTI_ARG_3_DI:
19801 case MULTI_ARG_3_SI:
19802 case MULTI_ARG_3_SI_DI:
19803 case MULTI_ARG_3_HI:
19804 case MULTI_ARG_3_HI_SI:
19805 case MULTI_ARG_3_QI:
19806 case MULTI_ARG_3_PERMPS:
19807 case MULTI_ARG_3_PERMPD:
19808 nargs = 3;
19809 break;
19810
19811 case MULTI_ARG_2_SF:
19812 case MULTI_ARG_2_DF:
19813 case MULTI_ARG_2_DI:
19814 case MULTI_ARG_2_SI:
19815 case MULTI_ARG_2_HI:
19816 case MULTI_ARG_2_QI:
19817 nargs = 2;
19818 break;
19819
19820 case MULTI_ARG_2_DI_IMM:
19821 case MULTI_ARG_2_SI_IMM:
19822 case MULTI_ARG_2_HI_IMM:
19823 case MULTI_ARG_2_QI_IMM:
19824 nargs = 2;
19825 last_arg_constant = true;
19826 break;
19827
19828 case MULTI_ARG_1_SF:
19829 case MULTI_ARG_1_DF:
19830 case MULTI_ARG_1_DI:
19831 case MULTI_ARG_1_SI:
19832 case MULTI_ARG_1_HI:
19833 case MULTI_ARG_1_QI:
19834 case MULTI_ARG_1_SI_DI:
19835 case MULTI_ARG_1_HI_DI:
19836 case MULTI_ARG_1_HI_SI:
19837 case MULTI_ARG_1_QI_DI:
19838 case MULTI_ARG_1_QI_SI:
19839 case MULTI_ARG_1_QI_HI:
19840 case MULTI_ARG_1_PH2PS:
19841 case MULTI_ARG_1_PS2PH:
19842 nargs = 1;
19843 break;
19844
19845 case MULTI_ARG_2_SF_CMP:
19846 case MULTI_ARG_2_DF_CMP:
19847 case MULTI_ARG_2_DI_CMP:
19848 case MULTI_ARG_2_SI_CMP:
19849 case MULTI_ARG_2_HI_CMP:
19850 case MULTI_ARG_2_QI_CMP:
19851 nargs = 2;
19852 comparison_p = true;
19853 break;
19854
19855 case MULTI_ARG_2_SF_TF:
19856 case MULTI_ARG_2_DF_TF:
19857 case MULTI_ARG_2_DI_TF:
19858 case MULTI_ARG_2_SI_TF:
19859 case MULTI_ARG_2_HI_TF:
19860 case MULTI_ARG_2_QI_TF:
19861 nargs = 2;
19862 tf_p = true;
19863 break;
19864
19865 case MULTI_ARG_UNKNOWN:
19866 default:
19867 gcc_unreachable ();
19868 }
19869
19870 if (optimize || !target
19871 || GET_MODE (target) != tmode
19872 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19873 target = gen_reg_rtx (tmode);
19874
19875 gcc_assert (nargs <= 4);
19876
19877 for (i = 0; i < nargs; i++)
19878 {
19879 tree arg = CALL_EXPR_ARG (exp, i);
19880 rtx op = expand_normal (arg);
19881 int adjust = (comparison_p) ? 1 : 0;
19882 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
19883
19884 if (last_arg_constant && i == nargs-1)
19885 {
19886 if (GET_CODE (op) != CONST_INT)
19887 {
19888 error ("last argument must be an immediate");
19889 return gen_reg_rtx (tmode);
19890 }
19891 }
19892 else
19893 {
19894 if (VECTOR_MODE_P (mode))
19895 op = safe_vector_operand (op, mode);
19896
19897 /* If we aren't optimizing, only allow one memory operand to be
19898 generated. */
19899 if (memory_operand (op, mode))
19900 num_memory++;
19901
19902 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
19903
19904 if (optimize
19905 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
19906 || num_memory > 1)
19907 op = force_reg (mode, op);
19908 }
19909
19910 args[i].op = op;
19911 args[i].mode = mode;
19912 }
19913
19914 switch (nargs)
19915 {
19916 case 1:
19917 pat = GEN_FCN (icode) (target, args[0].op);
19918 break;
19919
19920 case 2:
19921 if (tf_p)
19922 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
19923 GEN_INT ((int)sub_code));
19924 else if (! comparison_p)
19925 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
19926 else
19927 {
19928 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
19929 args[0].op,
19930 args[1].op);
19931
19932 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
19933 }
19934 break;
19935
19936 case 3:
19937 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
19938 break;
19939
19940 default:
19941 gcc_unreachable ();
19942 }
19943
19944 if (! pat)
19945 return 0;
19946
19947 emit_insn (pat);
19948 return target;
19949 }
19950
19951 /* Subroutine of ix86_expand_builtin to take care of stores. */
19952
19953 static rtx
19954 ix86_expand_store_builtin (enum insn_code icode, tree exp)
19955 {
19956 rtx pat;
19957 tree arg0 = CALL_EXPR_ARG (exp, 0);
19958 tree arg1 = CALL_EXPR_ARG (exp, 1);
19959 rtx op0 = expand_normal (arg0);
19960 rtx op1 = expand_normal (arg1);
19961 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
19962 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
19963
19964 if (VECTOR_MODE_P (mode1))
19965 op1 = safe_vector_operand (op1, mode1);
19966
19967 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19968 op1 = copy_to_mode_reg (mode1, op1);
19969
19970 pat = GEN_FCN (icode) (op0, op1);
19971 if (pat)
19972 emit_insn (pat);
19973 return 0;
19974 }
19975
19976 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
19977
19978 static rtx
19979 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
19980 rtx target, int do_load)
19981 {
19982 rtx pat;
19983 tree arg0 = CALL_EXPR_ARG (exp, 0);
19984 rtx op0 = expand_normal (arg0);
19985 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19986 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19987
19988 if (optimize || !target
19989 || GET_MODE (target) != tmode
19990 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19991 target = gen_reg_rtx (tmode);
19992 if (do_load)
19993 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19994 else
19995 {
19996 if (VECTOR_MODE_P (mode0))
19997 op0 = safe_vector_operand (op0, mode0);
19998
19999 if ((optimize && !register_operand (op0, mode0))
20000 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20001 op0 = copy_to_mode_reg (mode0, op0);
20002 }
20003
20004 switch (icode)
20005 {
20006 case CODE_FOR_sse4_1_roundpd:
20007 case CODE_FOR_sse4_1_roundps:
20008 {
20009 tree arg1 = CALL_EXPR_ARG (exp, 1);
20010 rtx op1 = expand_normal (arg1);
20011 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20012
20013 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20014 {
20015 error ("the second argument must be a 4-bit immediate");
20016 return const0_rtx;
20017 }
20018 pat = GEN_FCN (icode) (target, op0, op1);
20019 }
20020 break;
20021 default:
20022 pat = GEN_FCN (icode) (target, op0);
20023 break;
20024 }
20025
20026 if (! pat)
20027 return 0;
20028 emit_insn (pat);
20029 return target;
20030 }
20031
20032 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
20033 sqrtss, rsqrtss, rcpss. */
20034
20035 static rtx
20036 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
20037 {
20038 rtx pat;
20039 tree arg0 = CALL_EXPR_ARG (exp, 0);
20040 rtx op1, op0 = expand_normal (arg0);
20041 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20042 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20043
20044 if (optimize || !target
20045 || GET_MODE (target) != tmode
20046 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20047 target = gen_reg_rtx (tmode);
20048
20049 if (VECTOR_MODE_P (mode0))
20050 op0 = safe_vector_operand (op0, mode0);
20051
20052 if ((optimize && !register_operand (op0, mode0))
20053 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20054 op0 = copy_to_mode_reg (mode0, op0);
20055
20056 op1 = op0;
20057 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
20058 op1 = copy_to_mode_reg (mode0, op1);
20059
20060 pat = GEN_FCN (icode) (target, op0, op1);
20061 if (! pat)
20062 return 0;
20063 emit_insn (pat);
20064 return target;
20065 }
20066
20067 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
20068
20069 static rtx
20070 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
20071 rtx target)
20072 {
20073 rtx pat;
20074 tree arg0 = CALL_EXPR_ARG (exp, 0);
20075 tree arg1 = CALL_EXPR_ARG (exp, 1);
20076 rtx op0 = expand_normal (arg0);
20077 rtx op1 = expand_normal (arg1);
20078 rtx op2;
20079 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
20080 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
20081 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
20082 enum rtx_code comparison = d->comparison;
20083
20084 if (VECTOR_MODE_P (mode0))
20085 op0 = safe_vector_operand (op0, mode0);
20086 if (VECTOR_MODE_P (mode1))
20087 op1 = safe_vector_operand (op1, mode1);
20088
20089 /* Swap operands if we have a comparison that isn't available in
20090 hardware. */
20091 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20092 {
20093 rtx tmp = gen_reg_rtx (mode1);
20094 emit_move_insn (tmp, op1);
20095 op1 = op0;
20096 op0 = tmp;
20097 }
20098
20099 if (optimize || !target
20100 || GET_MODE (target) != tmode
20101 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
20102 target = gen_reg_rtx (tmode);
20103
20104 if ((optimize && !register_operand (op0, mode0))
20105 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
20106 op0 = copy_to_mode_reg (mode0, op0);
20107 if ((optimize && !register_operand (op1, mode1))
20108 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
20109 op1 = copy_to_mode_reg (mode1, op1);
20110
20111 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
20112 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
20113 if (! pat)
20114 return 0;
20115 emit_insn (pat);
20116 return target;
20117 }
20118
20119 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
20120
20121 static rtx
20122 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
20123 rtx target)
20124 {
20125 rtx pat;
20126 tree arg0 = CALL_EXPR_ARG (exp, 0);
20127 tree arg1 = CALL_EXPR_ARG (exp, 1);
20128 rtx op0 = expand_normal (arg0);
20129 rtx op1 = expand_normal (arg1);
20130 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20131 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20132 enum rtx_code comparison = d->comparison;
20133
20134 if (VECTOR_MODE_P (mode0))
20135 op0 = safe_vector_operand (op0, mode0);
20136 if (VECTOR_MODE_P (mode1))
20137 op1 = safe_vector_operand (op1, mode1);
20138
20139 /* Swap operands if we have a comparison that isn't available in
20140 hardware. */
20141 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20142 {
20143 rtx tmp = op1;
20144 op1 = op0;
20145 op0 = tmp;
20146 }
20147
20148 target = gen_reg_rtx (SImode);
20149 emit_move_insn (target, const0_rtx);
20150 target = gen_rtx_SUBREG (QImode, target, 0);
20151
20152 if ((optimize && !register_operand (op0, mode0))
20153 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20154 op0 = copy_to_mode_reg (mode0, op0);
20155 if ((optimize && !register_operand (op1, mode1))
20156 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20157 op1 = copy_to_mode_reg (mode1, op1);
20158
20159 pat = GEN_FCN (d->icode) (op0, op1);
20160 if (! pat)
20161 return 0;
20162 emit_insn (pat);
20163 emit_insn (gen_rtx_SET (VOIDmode,
20164 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20165 gen_rtx_fmt_ee (comparison, QImode,
20166 SET_DEST (pat),
20167 const0_rtx)));
20168
20169 return SUBREG_REG (target);
20170 }
20171
20172 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
20173
20174 static rtx
20175 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
20176 rtx target)
20177 {
20178 rtx pat;
20179 tree arg0 = CALL_EXPR_ARG (exp, 0);
20180 tree arg1 = CALL_EXPR_ARG (exp, 1);
20181 rtx op0 = expand_normal (arg0);
20182 rtx op1 = expand_normal (arg1);
20183 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20184 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20185 enum rtx_code comparison = d->comparison;
20186
20187 if (VECTOR_MODE_P (mode0))
20188 op0 = safe_vector_operand (op0, mode0);
20189 if (VECTOR_MODE_P (mode1))
20190 op1 = safe_vector_operand (op1, mode1);
20191
20192 target = gen_reg_rtx (SImode);
20193 emit_move_insn (target, const0_rtx);
20194 target = gen_rtx_SUBREG (QImode, target, 0);
20195
20196 if ((optimize && !register_operand (op0, mode0))
20197 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20198 op0 = copy_to_mode_reg (mode0, op0);
20199 if ((optimize && !register_operand (op1, mode1))
20200 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20201 op1 = copy_to_mode_reg (mode1, op1);
20202
20203 pat = GEN_FCN (d->icode) (op0, op1);
20204 if (! pat)
20205 return 0;
20206 emit_insn (pat);
20207 emit_insn (gen_rtx_SET (VOIDmode,
20208 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20209 gen_rtx_fmt_ee (comparison, QImode,
20210 SET_DEST (pat),
20211 const0_rtx)));
20212
20213 return SUBREG_REG (target);
20214 }
20215
20216 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
20217
20218 static rtx
20219 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
20220 tree exp, rtx target)
20221 {
20222 rtx pat;
20223 tree arg0 = CALL_EXPR_ARG (exp, 0);
20224 tree arg1 = CALL_EXPR_ARG (exp, 1);
20225 tree arg2 = CALL_EXPR_ARG (exp, 2);
20226 tree arg3 = CALL_EXPR_ARG (exp, 3);
20227 tree arg4 = CALL_EXPR_ARG (exp, 4);
20228 rtx scratch0, scratch1;
20229 rtx op0 = expand_normal (arg0);
20230 rtx op1 = expand_normal (arg1);
20231 rtx op2 = expand_normal (arg2);
20232 rtx op3 = expand_normal (arg3);
20233 rtx op4 = expand_normal (arg4);
20234 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
20235
20236 tmode0 = insn_data[d->icode].operand[0].mode;
20237 tmode1 = insn_data[d->icode].operand[1].mode;
20238 modev2 = insn_data[d->icode].operand[2].mode;
20239 modei3 = insn_data[d->icode].operand[3].mode;
20240 modev4 = insn_data[d->icode].operand[4].mode;
20241 modei5 = insn_data[d->icode].operand[5].mode;
20242 modeimm = insn_data[d->icode].operand[6].mode;
20243
20244 if (VECTOR_MODE_P (modev2))
20245 op0 = safe_vector_operand (op0, modev2);
20246 if (VECTOR_MODE_P (modev4))
20247 op2 = safe_vector_operand (op2, modev4);
20248
20249 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20250 op0 = copy_to_mode_reg (modev2, op0);
20251 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
20252 op1 = copy_to_mode_reg (modei3, op1);
20253 if ((optimize && !register_operand (op2, modev4))
20254 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
20255 op2 = copy_to_mode_reg (modev4, op2);
20256 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
20257 op3 = copy_to_mode_reg (modei5, op3);
20258
20259 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
20260 {
20261 error ("the fifth argument must be a 8-bit immediate");
20262 return const0_rtx;
20263 }
20264
20265 if (d->code == IX86_BUILTIN_PCMPESTRI128)
20266 {
20267 if (optimize || !target
20268 || GET_MODE (target) != tmode0
20269 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20270 target = gen_reg_rtx (tmode0);
20271
20272 scratch1 = gen_reg_rtx (tmode1);
20273
20274 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
20275 }
20276 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
20277 {
20278 if (optimize || !target
20279 || GET_MODE (target) != tmode1
20280 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20281 target = gen_reg_rtx (tmode1);
20282
20283 scratch0 = gen_reg_rtx (tmode0);
20284
20285 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
20286 }
20287 else
20288 {
20289 gcc_assert (d->flag);
20290
20291 scratch0 = gen_reg_rtx (tmode0);
20292 scratch1 = gen_reg_rtx (tmode1);
20293
20294 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
20295 }
20296
20297 if (! pat)
20298 return 0;
20299
20300 emit_insn (pat);
20301
20302 if (d->flag)
20303 {
20304 target = gen_reg_rtx (SImode);
20305 emit_move_insn (target, const0_rtx);
20306 target = gen_rtx_SUBREG (QImode, target, 0);
20307
20308 emit_insn
20309 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20310 gen_rtx_fmt_ee (EQ, QImode,
20311 gen_rtx_REG ((enum machine_mode) d->flag,
20312 FLAGS_REG),
20313 const0_rtx)));
20314 return SUBREG_REG (target);
20315 }
20316 else
20317 return target;
20318 }
20319
20320
20321 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
20322
20323 static rtx
20324 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
20325 tree exp, rtx target)
20326 {
20327 rtx pat;
20328 tree arg0 = CALL_EXPR_ARG (exp, 0);
20329 tree arg1 = CALL_EXPR_ARG (exp, 1);
20330 tree arg2 = CALL_EXPR_ARG (exp, 2);
20331 rtx scratch0, scratch1;
20332 rtx op0 = expand_normal (arg0);
20333 rtx op1 = expand_normal (arg1);
20334 rtx op2 = expand_normal (arg2);
20335 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
20336
20337 tmode0 = insn_data[d->icode].operand[0].mode;
20338 tmode1 = insn_data[d->icode].operand[1].mode;
20339 modev2 = insn_data[d->icode].operand[2].mode;
20340 modev3 = insn_data[d->icode].operand[3].mode;
20341 modeimm = insn_data[d->icode].operand[4].mode;
20342
20343 if (VECTOR_MODE_P (modev2))
20344 op0 = safe_vector_operand (op0, modev2);
20345 if (VECTOR_MODE_P (modev3))
20346 op1 = safe_vector_operand (op1, modev3);
20347
20348 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20349 op0 = copy_to_mode_reg (modev2, op0);
20350 if ((optimize && !register_operand (op1, modev3))
20351 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
20352 op1 = copy_to_mode_reg (modev3, op1);
20353
20354 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
20355 {
20356 error ("the third argument must be a 8-bit immediate");
20357 return const0_rtx;
20358 }
20359
20360 if (d->code == IX86_BUILTIN_PCMPISTRI128)
20361 {
20362 if (optimize || !target
20363 || GET_MODE (target) != tmode0
20364 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20365 target = gen_reg_rtx (tmode0);
20366
20367 scratch1 = gen_reg_rtx (tmode1);
20368
20369 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
20370 }
20371 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
20372 {
20373 if (optimize || !target
20374 || GET_MODE (target) != tmode1
20375 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20376 target = gen_reg_rtx (tmode1);
20377
20378 scratch0 = gen_reg_rtx (tmode0);
20379
20380 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
20381 }
20382 else
20383 {
20384 gcc_assert (d->flag);
20385
20386 scratch0 = gen_reg_rtx (tmode0);
20387 scratch1 = gen_reg_rtx (tmode1);
20388
20389 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
20390 }
20391
20392 if (! pat)
20393 return 0;
20394
20395 emit_insn (pat);
20396
20397 if (d->flag)
20398 {
20399 target = gen_reg_rtx (SImode);
20400 emit_move_insn (target, const0_rtx);
20401 target = gen_rtx_SUBREG (QImode, target, 0);
20402
20403 emit_insn
20404 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20405 gen_rtx_fmt_ee (EQ, QImode,
20406 gen_rtx_REG ((enum machine_mode) d->flag,
20407 FLAGS_REG),
20408 const0_rtx)));
20409 return SUBREG_REG (target);
20410 }
20411 else
20412 return target;
20413 }
20414
20415 /* Return the integer constant in ARG. Constrain it to be in the range
20416 of the subparts of VEC_TYPE; issue an error if not. */
20417
20418 static int
20419 get_element_number (tree vec_type, tree arg)
20420 {
20421 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
20422
20423 if (!host_integerp (arg, 1)
20424 || (elt = tree_low_cst (arg, 1), elt > max))
20425 {
20426 error ("selector must be an integer constant in the range 0..%wi", max);
20427 return 0;
20428 }
20429
20430 return elt;
20431 }
20432
20433 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20434 ix86_expand_vector_init. We DO have language-level syntax for this, in
20435 the form of (type){ init-list }. Except that since we can't place emms
20436 instructions from inside the compiler, we can't allow the use of MMX
20437 registers unless the user explicitly asks for it. So we do *not* define
20438 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
20439 we have builtins invoked by mmintrin.h that gives us license to emit
20440 these sorts of instructions. */
20441
20442 static rtx
20443 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
20444 {
20445 enum machine_mode tmode = TYPE_MODE (type);
20446 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
20447 int i, n_elt = GET_MODE_NUNITS (tmode);
20448 rtvec v = rtvec_alloc (n_elt);
20449
20450 gcc_assert (VECTOR_MODE_P (tmode));
20451 gcc_assert (call_expr_nargs (exp) == n_elt);
20452
20453 for (i = 0; i < n_elt; ++i)
20454 {
20455 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
20456 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
20457 }
20458
20459 if (!target || !register_operand (target, tmode))
20460 target = gen_reg_rtx (tmode);
20461
20462 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
20463 return target;
20464 }
20465
20466 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20467 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
20468 had a language-level syntax for referencing vector elements. */
20469
20470 static rtx
20471 ix86_expand_vec_ext_builtin (tree exp, rtx target)
20472 {
20473 enum machine_mode tmode, mode0;
20474 tree arg0, arg1;
20475 int elt;
20476 rtx op0;
20477
20478 arg0 = CALL_EXPR_ARG (exp, 0);
20479 arg1 = CALL_EXPR_ARG (exp, 1);
20480
20481 op0 = expand_normal (arg0);
20482 elt = get_element_number (TREE_TYPE (arg0), arg1);
20483
20484 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20485 mode0 = TYPE_MODE (TREE_TYPE (arg0));
20486 gcc_assert (VECTOR_MODE_P (mode0));
20487
20488 op0 = force_reg (mode0, op0);
20489
20490 if (optimize || !target || !register_operand (target, tmode))
20491 target = gen_reg_rtx (tmode);
20492
20493 ix86_expand_vector_extract (true, target, op0, elt);
20494
20495 return target;
20496 }
20497
20498 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20499 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
20500 a language-level syntax for referencing vector elements. */
20501
20502 static rtx
20503 ix86_expand_vec_set_builtin (tree exp)
20504 {
20505 enum machine_mode tmode, mode1;
20506 tree arg0, arg1, arg2;
20507 int elt;
20508 rtx op0, op1, target;
20509
20510 arg0 = CALL_EXPR_ARG (exp, 0);
20511 arg1 = CALL_EXPR_ARG (exp, 1);
20512 arg2 = CALL_EXPR_ARG (exp, 2);
20513
20514 tmode = TYPE_MODE (TREE_TYPE (arg0));
20515 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20516 gcc_assert (VECTOR_MODE_P (tmode));
20517
20518 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
20519 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
20520 elt = get_element_number (TREE_TYPE (arg0), arg2);
20521
20522 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
20523 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
20524
20525 op0 = force_reg (tmode, op0);
20526 op1 = force_reg (mode1, op1);
20527
20528 /* OP0 is the source of these builtin functions and shouldn't be
20529 modified. Create a copy, use it and return it as target. */
20530 target = gen_reg_rtx (tmode);
20531 emit_move_insn (target, op0);
20532 ix86_expand_vector_set (true, target, op1, elt);
20533
20534 return target;
20535 }
20536
20537 /* Expand an expression EXP that calls a built-in function,
20538 with result going to TARGET if that's convenient
20539 (and in mode MODE if that's convenient).
20540 SUBTARGET may be used as the target for computing one of EXP's operands.
20541 IGNORE is nonzero if the value is to be ignored. */
20542
20543 static rtx
20544 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
20545 enum machine_mode mode ATTRIBUTE_UNUSED,
20546 int ignore ATTRIBUTE_UNUSED)
20547 {
20548 const struct builtin_description *d;
20549 size_t i;
20550 enum insn_code icode;
20551 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20552 tree arg0, arg1, arg2, arg3;
20553 rtx op0, op1, op2, op3, pat;
20554 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
20555 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
20556
20557 switch (fcode)
20558 {
20559 case IX86_BUILTIN_EMMS:
20560 emit_insn (gen_mmx_emms ());
20561 return 0;
20562
20563 case IX86_BUILTIN_SFENCE:
20564 emit_insn (gen_sse_sfence ());
20565 return 0;
20566
20567 case IX86_BUILTIN_MASKMOVQ:
20568 case IX86_BUILTIN_MASKMOVDQU:
20569 icode = (fcode == IX86_BUILTIN_MASKMOVQ
20570 ? CODE_FOR_mmx_maskmovq
20571 : CODE_FOR_sse2_maskmovdqu);
20572 /* Note the arg order is different from the operand order. */
20573 arg1 = CALL_EXPR_ARG (exp, 0);
20574 arg2 = CALL_EXPR_ARG (exp, 1);
20575 arg0 = CALL_EXPR_ARG (exp, 2);
20576 op0 = expand_normal (arg0);
20577 op1 = expand_normal (arg1);
20578 op2 = expand_normal (arg2);
20579 mode0 = insn_data[icode].operand[0].mode;
20580 mode1 = insn_data[icode].operand[1].mode;
20581 mode2 = insn_data[icode].operand[2].mode;
20582
20583 op0 = force_reg (Pmode, op0);
20584 op0 = gen_rtx_MEM (mode1, op0);
20585
20586 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
20587 op0 = copy_to_mode_reg (mode0, op0);
20588 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
20589 op1 = copy_to_mode_reg (mode1, op1);
20590 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
20591 op2 = copy_to_mode_reg (mode2, op2);
20592 pat = GEN_FCN (icode) (op0, op1, op2);
20593 if (! pat)
20594 return 0;
20595 emit_insn (pat);
20596 return 0;
20597
20598 case IX86_BUILTIN_RSQRTF:
20599 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2, exp, target);
20600
20601 case IX86_BUILTIN_SQRTSS:
20602 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
20603 case IX86_BUILTIN_RSQRTSS:
20604 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
20605 case IX86_BUILTIN_RCPSS:
20606 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
20607
20608 case IX86_BUILTIN_LOADUPS:
20609 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
20610
20611 case IX86_BUILTIN_STOREUPS:
20612 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
20613
20614 case IX86_BUILTIN_LOADHPS:
20615 case IX86_BUILTIN_LOADLPS:
20616 case IX86_BUILTIN_LOADHPD:
20617 case IX86_BUILTIN_LOADLPD:
20618 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
20619 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
20620 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
20621 : CODE_FOR_sse2_loadlpd);
20622 arg0 = CALL_EXPR_ARG (exp, 0);
20623 arg1 = CALL_EXPR_ARG (exp, 1);
20624 op0 = expand_normal (arg0);
20625 op1 = expand_normal (arg1);
20626 tmode = insn_data[icode].operand[0].mode;
20627 mode0 = insn_data[icode].operand[1].mode;
20628 mode1 = insn_data[icode].operand[2].mode;
20629
20630 op0 = force_reg (mode0, op0);
20631 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
20632 if (optimize || target == 0
20633 || GET_MODE (target) != tmode
20634 || !register_operand (target, tmode))
20635 target = gen_reg_rtx (tmode);
20636 pat = GEN_FCN (icode) (target, op0, op1);
20637 if (! pat)
20638 return 0;
20639 emit_insn (pat);
20640 return target;
20641
20642 case IX86_BUILTIN_STOREHPS:
20643 case IX86_BUILTIN_STORELPS:
20644 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
20645 : CODE_FOR_sse_storelps);
20646 arg0 = CALL_EXPR_ARG (exp, 0);
20647 arg1 = CALL_EXPR_ARG (exp, 1);
20648 op0 = expand_normal (arg0);
20649 op1 = expand_normal (arg1);
20650 mode0 = insn_data[icode].operand[0].mode;
20651 mode1 = insn_data[icode].operand[1].mode;
20652
20653 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20654 op1 = force_reg (mode1, op1);
20655
20656 pat = GEN_FCN (icode) (op0, op1);
20657 if (! pat)
20658 return 0;
20659 emit_insn (pat);
20660 return const0_rtx;
20661
20662 case IX86_BUILTIN_MOVNTPS:
20663 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
20664 case IX86_BUILTIN_MOVNTQ:
20665 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
20666
20667 case IX86_BUILTIN_LDMXCSR:
20668 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
20669 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
20670 emit_move_insn (target, op0);
20671 emit_insn (gen_sse_ldmxcsr (target));
20672 return 0;
20673
20674 case IX86_BUILTIN_STMXCSR:
20675 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
20676 emit_insn (gen_sse_stmxcsr (target));
20677 return copy_to_mode_reg (SImode, target);
20678
20679 case IX86_BUILTIN_SHUFPS:
20680 case IX86_BUILTIN_SHUFPD:
20681 icode = (fcode == IX86_BUILTIN_SHUFPS
20682 ? CODE_FOR_sse_shufps
20683 : CODE_FOR_sse2_shufpd);
20684 arg0 = CALL_EXPR_ARG (exp, 0);
20685 arg1 = CALL_EXPR_ARG (exp, 1);
20686 arg2 = CALL_EXPR_ARG (exp, 2);
20687 op0 = expand_normal (arg0);
20688 op1 = expand_normal (arg1);
20689 op2 = expand_normal (arg2);
20690 tmode = insn_data[icode].operand[0].mode;
20691 mode0 = insn_data[icode].operand[1].mode;
20692 mode1 = insn_data[icode].operand[2].mode;
20693 mode2 = insn_data[icode].operand[3].mode;
20694
20695 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20696 op0 = copy_to_mode_reg (mode0, op0);
20697 if ((optimize && !register_operand (op1, mode1))
20698 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
20699 op1 = copy_to_mode_reg (mode1, op1);
20700 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20701 {
20702 /* @@@ better error message */
20703 error ("mask must be an immediate");
20704 return gen_reg_rtx (tmode);
20705 }
20706 if (optimize || target == 0
20707 || GET_MODE (target) != tmode
20708 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20709 target = gen_reg_rtx (tmode);
20710 pat = GEN_FCN (icode) (target, op0, op1, op2);
20711 if (! pat)
20712 return 0;
20713 emit_insn (pat);
20714 return target;
20715
20716 case IX86_BUILTIN_PSHUFW:
20717 case IX86_BUILTIN_PSHUFD:
20718 case IX86_BUILTIN_PSHUFHW:
20719 case IX86_BUILTIN_PSHUFLW:
20720 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
20721 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
20722 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
20723 : CODE_FOR_mmx_pshufw);
20724 arg0 = CALL_EXPR_ARG (exp, 0);
20725 arg1 = CALL_EXPR_ARG (exp, 1);
20726 op0 = expand_normal (arg0);
20727 op1 = expand_normal (arg1);
20728 tmode = insn_data[icode].operand[0].mode;
20729 mode1 = insn_data[icode].operand[1].mode;
20730 mode2 = insn_data[icode].operand[2].mode;
20731
20732 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20733 op0 = copy_to_mode_reg (mode1, op0);
20734 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20735 {
20736 /* @@@ better error message */
20737 error ("mask must be an immediate");
20738 return const0_rtx;
20739 }
20740 if (target == 0
20741 || GET_MODE (target) != tmode
20742 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20743 target = gen_reg_rtx (tmode);
20744 pat = GEN_FCN (icode) (target, op0, op1);
20745 if (! pat)
20746 return 0;
20747 emit_insn (pat);
20748 return target;
20749
20750 case IX86_BUILTIN_PSLLW128:
20751 case IX86_BUILTIN_PSLLWI128:
20752 icode = CODE_FOR_ashlv8hi3;
20753 goto do_pshift;
20754 case IX86_BUILTIN_PSLLD128:
20755 case IX86_BUILTIN_PSLLDI128:
20756 icode = CODE_FOR_ashlv4si3;
20757 goto do_pshift;
20758 case IX86_BUILTIN_PSLLQ128:
20759 case IX86_BUILTIN_PSLLQI128:
20760 icode = CODE_FOR_ashlv2di3;
20761 goto do_pshift;
20762 case IX86_BUILTIN_PSRAW128:
20763 case IX86_BUILTIN_PSRAWI128:
20764 icode = CODE_FOR_ashrv8hi3;
20765 goto do_pshift;
20766 case IX86_BUILTIN_PSRAD128:
20767 case IX86_BUILTIN_PSRADI128:
20768 icode = CODE_FOR_ashrv4si3;
20769 goto do_pshift;
20770 case IX86_BUILTIN_PSRLW128:
20771 case IX86_BUILTIN_PSRLWI128:
20772 icode = CODE_FOR_lshrv8hi3;
20773 goto do_pshift;
20774 case IX86_BUILTIN_PSRLD128:
20775 case IX86_BUILTIN_PSRLDI128:
20776 icode = CODE_FOR_lshrv4si3;
20777 goto do_pshift;
20778 case IX86_BUILTIN_PSRLQ128:
20779 case IX86_BUILTIN_PSRLQI128:
20780 icode = CODE_FOR_lshrv2di3;
20781
20782 do_pshift:
20783 arg0 = CALL_EXPR_ARG (exp, 0);
20784 arg1 = CALL_EXPR_ARG (exp, 1);
20785 op0 = expand_normal (arg0);
20786 op1 = expand_normal (arg1);
20787
20788 tmode = insn_data[icode].operand[0].mode;
20789 mode1 = insn_data[icode].operand[1].mode;
20790
20791 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20792 op0 = copy_to_reg (op0);
20793
20794 if (!CONST_INT_P (op1))
20795 op1 = simplify_gen_subreg (SImode, op1, GET_MODE (op1), 0);
20796
20797 if (! (*insn_data[icode].operand[2].predicate) (op1, SImode))
20798 op1 = copy_to_reg (op1);
20799
20800 target = gen_reg_rtx (tmode);
20801 pat = GEN_FCN (icode) (target, op0, op1);
20802 if (!pat)
20803 return 0;
20804 emit_insn (pat);
20805 return target;
20806
20807 case IX86_BUILTIN_PSLLDQI128:
20808 case IX86_BUILTIN_PSRLDQI128:
20809 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
20810 : CODE_FOR_sse2_lshrti3);
20811 arg0 = CALL_EXPR_ARG (exp, 0);
20812 arg1 = CALL_EXPR_ARG (exp, 1);
20813 op0 = expand_normal (arg0);
20814 op1 = expand_normal (arg1);
20815 tmode = insn_data[icode].operand[0].mode;
20816 mode1 = insn_data[icode].operand[1].mode;
20817 mode2 = insn_data[icode].operand[2].mode;
20818
20819 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20820 {
20821 op0 = copy_to_reg (op0);
20822 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
20823 }
20824 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20825 {
20826 error ("shift must be an immediate");
20827 return const0_rtx;
20828 }
20829 target = gen_reg_rtx (V2DImode);
20830 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
20831 op0, op1);
20832 if (! pat)
20833 return 0;
20834 emit_insn (pat);
20835 return target;
20836
20837 case IX86_BUILTIN_FEMMS:
20838 emit_insn (gen_mmx_femms ());
20839 return NULL_RTX;
20840
20841 case IX86_BUILTIN_PAVGUSB:
20842 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
20843
20844 case IX86_BUILTIN_PF2ID:
20845 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
20846
20847 case IX86_BUILTIN_PFACC:
20848 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
20849
20850 case IX86_BUILTIN_PFADD:
20851 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
20852
20853 case IX86_BUILTIN_PFCMPEQ:
20854 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
20855
20856 case IX86_BUILTIN_PFCMPGE:
20857 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
20858
20859 case IX86_BUILTIN_PFCMPGT:
20860 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
20861
20862 case IX86_BUILTIN_PFMAX:
20863 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
20864
20865 case IX86_BUILTIN_PFMIN:
20866 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
20867
20868 case IX86_BUILTIN_PFMUL:
20869 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
20870
20871 case IX86_BUILTIN_PFRCP:
20872 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
20873
20874 case IX86_BUILTIN_PFRCPIT1:
20875 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
20876
20877 case IX86_BUILTIN_PFRCPIT2:
20878 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
20879
20880 case IX86_BUILTIN_PFRSQIT1:
20881 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
20882
20883 case IX86_BUILTIN_PFRSQRT:
20884 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
20885
20886 case IX86_BUILTIN_PFSUB:
20887 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
20888
20889 case IX86_BUILTIN_PFSUBR:
20890 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
20891
20892 case IX86_BUILTIN_PI2FD:
20893 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
20894
20895 case IX86_BUILTIN_PMULHRW:
20896 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
20897
20898 case IX86_BUILTIN_PF2IW:
20899 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
20900
20901 case IX86_BUILTIN_PFNACC:
20902 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
20903
20904 case IX86_BUILTIN_PFPNACC:
20905 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
20906
20907 case IX86_BUILTIN_PI2FW:
20908 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
20909
20910 case IX86_BUILTIN_PSWAPDSI:
20911 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
20912
20913 case IX86_BUILTIN_PSWAPDSF:
20914 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
20915
20916 case IX86_BUILTIN_SQRTSD:
20917 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
20918 case IX86_BUILTIN_LOADUPD:
20919 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
20920 case IX86_BUILTIN_STOREUPD:
20921 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
20922
20923 case IX86_BUILTIN_MFENCE:
20924 emit_insn (gen_sse2_mfence ());
20925 return 0;
20926 case IX86_BUILTIN_LFENCE:
20927 emit_insn (gen_sse2_lfence ());
20928 return 0;
20929
20930 case IX86_BUILTIN_CLFLUSH:
20931 arg0 = CALL_EXPR_ARG (exp, 0);
20932 op0 = expand_normal (arg0);
20933 icode = CODE_FOR_sse2_clflush;
20934 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
20935 op0 = copy_to_mode_reg (Pmode, op0);
20936
20937 emit_insn (gen_sse2_clflush (op0));
20938 return 0;
20939
20940 case IX86_BUILTIN_MOVNTPD:
20941 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
20942 case IX86_BUILTIN_MOVNTDQ:
20943 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
20944 case IX86_BUILTIN_MOVNTI:
20945 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
20946
20947 case IX86_BUILTIN_LOADDQU:
20948 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
20949 case IX86_BUILTIN_STOREDQU:
20950 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
20951
20952 case IX86_BUILTIN_MONITOR:
20953 arg0 = CALL_EXPR_ARG (exp, 0);
20954 arg1 = CALL_EXPR_ARG (exp, 1);
20955 arg2 = CALL_EXPR_ARG (exp, 2);
20956 op0 = expand_normal (arg0);
20957 op1 = expand_normal (arg1);
20958 op2 = expand_normal (arg2);
20959 if (!REG_P (op0))
20960 op0 = copy_to_mode_reg (Pmode, op0);
20961 if (!REG_P (op1))
20962 op1 = copy_to_mode_reg (SImode, op1);
20963 if (!REG_P (op2))
20964 op2 = copy_to_mode_reg (SImode, op2);
20965 if (!TARGET_64BIT)
20966 emit_insn (gen_sse3_monitor (op0, op1, op2));
20967 else
20968 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
20969 return 0;
20970
20971 case IX86_BUILTIN_MWAIT:
20972 arg0 = CALL_EXPR_ARG (exp, 0);
20973 arg1 = CALL_EXPR_ARG (exp, 1);
20974 op0 = expand_normal (arg0);
20975 op1 = expand_normal (arg1);
20976 if (!REG_P (op0))
20977 op0 = copy_to_mode_reg (SImode, op0);
20978 if (!REG_P (op1))
20979 op1 = copy_to_mode_reg (SImode, op1);
20980 emit_insn (gen_sse3_mwait (op0, op1));
20981 return 0;
20982
20983 case IX86_BUILTIN_LDDQU:
20984 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
20985 target, 1);
20986
20987 case IX86_BUILTIN_PALIGNR:
20988 case IX86_BUILTIN_PALIGNR128:
20989 if (fcode == IX86_BUILTIN_PALIGNR)
20990 {
20991 icode = CODE_FOR_ssse3_palignrdi;
20992 mode = DImode;
20993 }
20994 else
20995 {
20996 icode = CODE_FOR_ssse3_palignrti;
20997 mode = V2DImode;
20998 }
20999 arg0 = CALL_EXPR_ARG (exp, 0);
21000 arg1 = CALL_EXPR_ARG (exp, 1);
21001 arg2 = CALL_EXPR_ARG (exp, 2);
21002 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21003 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21004 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21005 tmode = insn_data[icode].operand[0].mode;
21006 mode1 = insn_data[icode].operand[1].mode;
21007 mode2 = insn_data[icode].operand[2].mode;
21008 mode3 = insn_data[icode].operand[3].mode;
21009
21010 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21011 {
21012 op0 = copy_to_reg (op0);
21013 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
21014 }
21015 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21016 {
21017 op1 = copy_to_reg (op1);
21018 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
21019 }
21020 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21021 {
21022 error ("shift must be an immediate");
21023 return const0_rtx;
21024 }
21025 target = gen_reg_rtx (mode);
21026 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
21027 op0, op1, op2);
21028 if (! pat)
21029 return 0;
21030 emit_insn (pat);
21031 return target;
21032
21033 case IX86_BUILTIN_MOVNTDQA:
21034 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
21035 target, 1);
21036
21037 case IX86_BUILTIN_MOVNTSD:
21038 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
21039
21040 case IX86_BUILTIN_MOVNTSS:
21041 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
21042
21043 case IX86_BUILTIN_INSERTQ:
21044 case IX86_BUILTIN_EXTRQ:
21045 icode = (fcode == IX86_BUILTIN_EXTRQ
21046 ? CODE_FOR_sse4a_extrq
21047 : CODE_FOR_sse4a_insertq);
21048 arg0 = CALL_EXPR_ARG (exp, 0);
21049 arg1 = CALL_EXPR_ARG (exp, 1);
21050 op0 = expand_normal (arg0);
21051 op1 = expand_normal (arg1);
21052 tmode = insn_data[icode].operand[0].mode;
21053 mode1 = insn_data[icode].operand[1].mode;
21054 mode2 = insn_data[icode].operand[2].mode;
21055 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21056 op0 = copy_to_mode_reg (mode1, op0);
21057 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21058 op1 = copy_to_mode_reg (mode2, op1);
21059 if (optimize || target == 0
21060 || GET_MODE (target) != tmode
21061 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21062 target = gen_reg_rtx (tmode);
21063 pat = GEN_FCN (icode) (target, op0, op1);
21064 if (! pat)
21065 return NULL_RTX;
21066 emit_insn (pat);
21067 return target;
21068
21069 case IX86_BUILTIN_EXTRQI:
21070 icode = CODE_FOR_sse4a_extrqi;
21071 arg0 = CALL_EXPR_ARG (exp, 0);
21072 arg1 = CALL_EXPR_ARG (exp, 1);
21073 arg2 = CALL_EXPR_ARG (exp, 2);
21074 op0 = expand_normal (arg0);
21075 op1 = expand_normal (arg1);
21076 op2 = expand_normal (arg2);
21077 tmode = insn_data[icode].operand[0].mode;
21078 mode1 = insn_data[icode].operand[1].mode;
21079 mode2 = insn_data[icode].operand[2].mode;
21080 mode3 = insn_data[icode].operand[3].mode;
21081 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21082 op0 = copy_to_mode_reg (mode1, op0);
21083 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21084 {
21085 error ("index mask must be an immediate");
21086 return gen_reg_rtx (tmode);
21087 }
21088 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21089 {
21090 error ("length mask must be an immediate");
21091 return gen_reg_rtx (tmode);
21092 }
21093 if (optimize || target == 0
21094 || GET_MODE (target) != tmode
21095 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21096 target = gen_reg_rtx (tmode);
21097 pat = GEN_FCN (icode) (target, op0, op1, op2);
21098 if (! pat)
21099 return NULL_RTX;
21100 emit_insn (pat);
21101 return target;
21102
21103 case IX86_BUILTIN_INSERTQI:
21104 icode = CODE_FOR_sse4a_insertqi;
21105 arg0 = CALL_EXPR_ARG (exp, 0);
21106 arg1 = CALL_EXPR_ARG (exp, 1);
21107 arg2 = CALL_EXPR_ARG (exp, 2);
21108 arg3 = CALL_EXPR_ARG (exp, 3);
21109 op0 = expand_normal (arg0);
21110 op1 = expand_normal (arg1);
21111 op2 = expand_normal (arg2);
21112 op3 = expand_normal (arg3);
21113 tmode = insn_data[icode].operand[0].mode;
21114 mode1 = insn_data[icode].operand[1].mode;
21115 mode2 = insn_data[icode].operand[2].mode;
21116 mode3 = insn_data[icode].operand[3].mode;
21117 mode4 = insn_data[icode].operand[4].mode;
21118
21119 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21120 op0 = copy_to_mode_reg (mode1, op0);
21121
21122 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21123 op1 = copy_to_mode_reg (mode2, op1);
21124
21125 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21126 {
21127 error ("index mask must be an immediate");
21128 return gen_reg_rtx (tmode);
21129 }
21130 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
21131 {
21132 error ("length mask must be an immediate");
21133 return gen_reg_rtx (tmode);
21134 }
21135 if (optimize || target == 0
21136 || GET_MODE (target) != tmode
21137 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21138 target = gen_reg_rtx (tmode);
21139 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
21140 if (! pat)
21141 return NULL_RTX;
21142 emit_insn (pat);
21143 return target;
21144
21145 case IX86_BUILTIN_VEC_INIT_V2SI:
21146 case IX86_BUILTIN_VEC_INIT_V4HI:
21147 case IX86_BUILTIN_VEC_INIT_V8QI:
21148 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
21149
21150 case IX86_BUILTIN_VEC_EXT_V2DF:
21151 case IX86_BUILTIN_VEC_EXT_V2DI:
21152 case IX86_BUILTIN_VEC_EXT_V4SF:
21153 case IX86_BUILTIN_VEC_EXT_V4SI:
21154 case IX86_BUILTIN_VEC_EXT_V8HI:
21155 case IX86_BUILTIN_VEC_EXT_V2SI:
21156 case IX86_BUILTIN_VEC_EXT_V4HI:
21157 case IX86_BUILTIN_VEC_EXT_V16QI:
21158 return ix86_expand_vec_ext_builtin (exp, target);
21159
21160 case IX86_BUILTIN_VEC_SET_V2DI:
21161 case IX86_BUILTIN_VEC_SET_V4SF:
21162 case IX86_BUILTIN_VEC_SET_V4SI:
21163 case IX86_BUILTIN_VEC_SET_V8HI:
21164 case IX86_BUILTIN_VEC_SET_V4HI:
21165 case IX86_BUILTIN_VEC_SET_V16QI:
21166 return ix86_expand_vec_set_builtin (exp);
21167
21168 case IX86_BUILTIN_INFQ:
21169 {
21170 REAL_VALUE_TYPE inf;
21171 rtx tmp;
21172
21173 real_inf (&inf);
21174 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
21175
21176 tmp = validize_mem (force_const_mem (mode, tmp));
21177
21178 if (target == 0)
21179 target = gen_reg_rtx (mode);
21180
21181 emit_move_insn (target, tmp);
21182 return target;
21183 }
21184
21185 case IX86_BUILTIN_FABSQ:
21186 return ix86_expand_unop_builtin (CODE_FOR_abstf2, exp, target, 0);
21187
21188 case IX86_BUILTIN_COPYSIGNQ:
21189 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3, exp, target);
21190
21191 default:
21192 break;
21193 }
21194
21195 for (i = 0, d = bdesc_sse_3arg;
21196 i < ARRAY_SIZE (bdesc_sse_3arg);
21197 i++, d++)
21198 if (d->code == fcode)
21199 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
21200 target);
21201
21202 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21203 if (d->code == fcode)
21204 {
21205 /* Compares are treated specially. */
21206 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
21207 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
21208 || d->icode == CODE_FOR_sse2_maskcmpv2df3
21209 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
21210 return ix86_expand_sse_compare (d, exp, target);
21211
21212 return ix86_expand_binop_builtin (d->icode, exp, target);
21213 }
21214
21215 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21216 if (d->code == fcode)
21217 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
21218
21219 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
21220 if (d->code == fcode)
21221 return ix86_expand_sse_comi (d, exp, target);
21222
21223 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
21224 if (d->code == fcode)
21225 return ix86_expand_sse_ptest (d, exp, target);
21226
21227 for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
21228 if (d->code == fcode)
21229 return ix86_expand_crc32 (d->icode, exp, target);
21230
21231 for (i = 0, d = bdesc_pcmpestr;
21232 i < ARRAY_SIZE (bdesc_pcmpestr);
21233 i++, d++)
21234 if (d->code == fcode)
21235 return ix86_expand_sse_pcmpestr (d, exp, target);
21236
21237 for (i = 0, d = bdesc_pcmpistr;
21238 i < ARRAY_SIZE (bdesc_pcmpistr);
21239 i++, d++)
21240 if (d->code == fcode)
21241 return ix86_expand_sse_pcmpistr (d, exp, target);
21242
21243 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21244 if (d->code == fcode)
21245 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
21246 (enum multi_arg_type)d->flag,
21247 d->comparison);
21248
21249 gcc_unreachable ();
21250 }
21251
21252 /* Returns a function decl for a vectorized version of the builtin function
21253 with builtin function code FN and the result vector type TYPE, or NULL_TREE
21254 if it is not available. */
21255
21256 static tree
21257 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
21258 tree type_in)
21259 {
21260 enum machine_mode in_mode, out_mode;
21261 int in_n, out_n;
21262
21263 if (TREE_CODE (type_out) != VECTOR_TYPE
21264 || TREE_CODE (type_in) != VECTOR_TYPE)
21265 return NULL_TREE;
21266
21267 out_mode = TYPE_MODE (TREE_TYPE (type_out));
21268 out_n = TYPE_VECTOR_SUBPARTS (type_out);
21269 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21270 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21271
21272 switch (fn)
21273 {
21274 case BUILT_IN_SQRT:
21275 if (out_mode == DFmode && out_n == 2
21276 && in_mode == DFmode && in_n == 2)
21277 return ix86_builtins[IX86_BUILTIN_SQRTPD];
21278 break;
21279
21280 case BUILT_IN_SQRTF:
21281 if (out_mode == SFmode && out_n == 4
21282 && in_mode == SFmode && in_n == 4)
21283 return ix86_builtins[IX86_BUILTIN_SQRTPS];
21284 break;
21285
21286 case BUILT_IN_LRINT:
21287 if (out_mode == SImode && out_n == 4
21288 && in_mode == DFmode && in_n == 2)
21289 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
21290 break;
21291
21292 case BUILT_IN_LRINTF:
21293 if (out_mode == SImode && out_n == 4
21294 && in_mode == SFmode && in_n == 4)
21295 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
21296 break;
21297
21298 default:
21299 ;
21300 }
21301
21302 /* Dispatch to a handler for a vectorization library. */
21303 if (ix86_veclib_handler)
21304 return (*ix86_veclib_handler)(fn, type_out, type_in);
21305
21306 return NULL_TREE;
21307 }
21308
21309 /* Handler for an ACML-style interface to a library with vectorized
21310 intrinsics. */
21311
21312 static tree
21313 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
21314 {
21315 char name[20] = "__vr.._";
21316 tree fntype, new_fndecl, args;
21317 unsigned arity;
21318 const char *bname;
21319 enum machine_mode el_mode, in_mode;
21320 int n, in_n;
21321
21322 /* The ACML is 64bits only and suitable for unsafe math only as
21323 it does not correctly support parts of IEEE with the required
21324 precision such as denormals. */
21325 if (!TARGET_64BIT
21326 || !flag_unsafe_math_optimizations)
21327 return NULL_TREE;
21328
21329 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21330 n = TYPE_VECTOR_SUBPARTS (type_out);
21331 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21332 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21333 if (el_mode != in_mode
21334 || n != in_n)
21335 return NULL_TREE;
21336
21337 switch (fn)
21338 {
21339 case BUILT_IN_SIN:
21340 case BUILT_IN_COS:
21341 case BUILT_IN_EXP:
21342 case BUILT_IN_LOG:
21343 case BUILT_IN_LOG2:
21344 case BUILT_IN_LOG10:
21345 name[4] = 'd';
21346 name[5] = '2';
21347 if (el_mode != DFmode
21348 || n != 2)
21349 return NULL_TREE;
21350 break;
21351
21352 case BUILT_IN_SINF:
21353 case BUILT_IN_COSF:
21354 case BUILT_IN_EXPF:
21355 case BUILT_IN_POWF:
21356 case BUILT_IN_LOGF:
21357 case BUILT_IN_LOG2F:
21358 case BUILT_IN_LOG10F:
21359 name[4] = 's';
21360 name[5] = '4';
21361 if (el_mode != SFmode
21362 || n != 4)
21363 return NULL_TREE;
21364 break;
21365
21366 default:
21367 return NULL_TREE;
21368 }
21369
21370 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21371 sprintf (name + 7, "%s", bname+10);
21372
21373 arity = 0;
21374 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21375 args = TREE_CHAIN (args))
21376 arity++;
21377
21378 if (arity == 1)
21379 fntype = build_function_type_list (type_out, type_in, NULL);
21380 else
21381 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21382
21383 /* Build a function declaration for the vectorized function. */
21384 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21385 TREE_PUBLIC (new_fndecl) = 1;
21386 DECL_EXTERNAL (new_fndecl) = 1;
21387 DECL_IS_NOVOPS (new_fndecl) = 1;
21388 TREE_READONLY (new_fndecl) = 1;
21389
21390 return new_fndecl;
21391 }
21392
21393
21394 /* Returns a decl of a function that implements conversion of the
21395 input vector of type TYPE, or NULL_TREE if it is not available. */
21396
21397 static tree
21398 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
21399 {
21400 if (TREE_CODE (type) != VECTOR_TYPE)
21401 return NULL_TREE;
21402
21403 switch (code)
21404 {
21405 case FLOAT_EXPR:
21406 switch (TYPE_MODE (type))
21407 {
21408 case V4SImode:
21409 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
21410 default:
21411 return NULL_TREE;
21412 }
21413
21414 case FIX_TRUNC_EXPR:
21415 switch (TYPE_MODE (type))
21416 {
21417 case V4SFmode:
21418 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
21419 default:
21420 return NULL_TREE;
21421 }
21422 default:
21423 return NULL_TREE;
21424
21425 }
21426 }
21427
21428 /* Returns a code for a target-specific builtin that implements
21429 reciprocal of the function, or NULL_TREE if not available. */
21430
21431 static tree
21432 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
21433 bool sqrt ATTRIBUTE_UNUSED)
21434 {
21435 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
21436 && flag_finite_math_only && !flag_trapping_math
21437 && flag_unsafe_math_optimizations))
21438 return NULL_TREE;
21439
21440 if (md_fn)
21441 /* Machine dependent builtins. */
21442 switch (fn)
21443 {
21444 /* Vectorized version of sqrt to rsqrt conversion. */
21445 case IX86_BUILTIN_SQRTPS:
21446 return ix86_builtins[IX86_BUILTIN_RSQRTPS];
21447
21448 default:
21449 return NULL_TREE;
21450 }
21451 else
21452 /* Normal builtins. */
21453 switch (fn)
21454 {
21455 /* Sqrt to rsqrt conversion. */
21456 case BUILT_IN_SQRTF:
21457 return ix86_builtins[IX86_BUILTIN_RSQRTF];
21458
21459 default:
21460 return NULL_TREE;
21461 }
21462 }
21463
21464 /* Store OPERAND to the memory after reload is completed. This means
21465 that we can't easily use assign_stack_local. */
21466 rtx
21467 ix86_force_to_memory (enum machine_mode mode, rtx operand)
21468 {
21469 rtx result;
21470
21471 gcc_assert (reload_completed);
21472 if (TARGET_RED_ZONE)
21473 {
21474 result = gen_rtx_MEM (mode,
21475 gen_rtx_PLUS (Pmode,
21476 stack_pointer_rtx,
21477 GEN_INT (-RED_ZONE_SIZE)));
21478 emit_move_insn (result, operand);
21479 }
21480 else if (!TARGET_RED_ZONE && TARGET_64BIT)
21481 {
21482 switch (mode)
21483 {
21484 case HImode:
21485 case SImode:
21486 operand = gen_lowpart (DImode, operand);
21487 /* FALLTHRU */
21488 case DImode:
21489 emit_insn (
21490 gen_rtx_SET (VOIDmode,
21491 gen_rtx_MEM (DImode,
21492 gen_rtx_PRE_DEC (DImode,
21493 stack_pointer_rtx)),
21494 operand));
21495 break;
21496 default:
21497 gcc_unreachable ();
21498 }
21499 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21500 }
21501 else
21502 {
21503 switch (mode)
21504 {
21505 case DImode:
21506 {
21507 rtx operands[2];
21508 split_di (&operand, 1, operands, operands + 1);
21509 emit_insn (
21510 gen_rtx_SET (VOIDmode,
21511 gen_rtx_MEM (SImode,
21512 gen_rtx_PRE_DEC (Pmode,
21513 stack_pointer_rtx)),
21514 operands[1]));
21515 emit_insn (
21516 gen_rtx_SET (VOIDmode,
21517 gen_rtx_MEM (SImode,
21518 gen_rtx_PRE_DEC (Pmode,
21519 stack_pointer_rtx)),
21520 operands[0]));
21521 }
21522 break;
21523 case HImode:
21524 /* Store HImodes as SImodes. */
21525 operand = gen_lowpart (SImode, operand);
21526 /* FALLTHRU */
21527 case SImode:
21528 emit_insn (
21529 gen_rtx_SET (VOIDmode,
21530 gen_rtx_MEM (GET_MODE (operand),
21531 gen_rtx_PRE_DEC (SImode,
21532 stack_pointer_rtx)),
21533 operand));
21534 break;
21535 default:
21536 gcc_unreachable ();
21537 }
21538 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21539 }
21540 return result;
21541 }
21542
21543 /* Free operand from the memory. */
21544 void
21545 ix86_free_from_memory (enum machine_mode mode)
21546 {
21547 if (!TARGET_RED_ZONE)
21548 {
21549 int size;
21550
21551 if (mode == DImode || TARGET_64BIT)
21552 size = 8;
21553 else
21554 size = 4;
21555 /* Use LEA to deallocate stack space. In peephole2 it will be converted
21556 to pop or add instruction if registers are available. */
21557 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21558 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
21559 GEN_INT (size))));
21560 }
21561 }
21562
21563 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
21564 QImode must go into class Q_REGS.
21565 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
21566 movdf to do mem-to-mem moves through integer regs. */
21567 enum reg_class
21568 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
21569 {
21570 enum machine_mode mode = GET_MODE (x);
21571
21572 /* We're only allowed to return a subclass of CLASS. Many of the
21573 following checks fail for NO_REGS, so eliminate that early. */
21574 if (regclass == NO_REGS)
21575 return NO_REGS;
21576
21577 /* All classes can load zeros. */
21578 if (x == CONST0_RTX (mode))
21579 return regclass;
21580
21581 /* Force constants into memory if we are loading a (nonzero) constant into
21582 an MMX or SSE register. This is because there are no MMX/SSE instructions
21583 to load from a constant. */
21584 if (CONSTANT_P (x)
21585 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
21586 return NO_REGS;
21587
21588 /* Prefer SSE regs only, if we can use them for math. */
21589 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
21590 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
21591
21592 /* Floating-point constants need more complex checks. */
21593 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
21594 {
21595 /* General regs can load everything. */
21596 if (reg_class_subset_p (regclass, GENERAL_REGS))
21597 return regclass;
21598
21599 /* Floats can load 0 and 1 plus some others. Note that we eliminated
21600 zero above. We only want to wind up preferring 80387 registers if
21601 we plan on doing computation with them. */
21602 if (TARGET_80387
21603 && standard_80387_constant_p (x))
21604 {
21605 /* Limit class to non-sse. */
21606 if (regclass == FLOAT_SSE_REGS)
21607 return FLOAT_REGS;
21608 if (regclass == FP_TOP_SSE_REGS)
21609 return FP_TOP_REG;
21610 if (regclass == FP_SECOND_SSE_REGS)
21611 return FP_SECOND_REG;
21612 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
21613 return regclass;
21614 }
21615
21616 return NO_REGS;
21617 }
21618
21619 /* Generally when we see PLUS here, it's the function invariant
21620 (plus soft-fp const_int). Which can only be computed into general
21621 regs. */
21622 if (GET_CODE (x) == PLUS)
21623 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
21624
21625 /* QImode constants are easy to load, but non-constant QImode data
21626 must go into Q_REGS. */
21627 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
21628 {
21629 if (reg_class_subset_p (regclass, Q_REGS))
21630 return regclass;
21631 if (reg_class_subset_p (Q_REGS, regclass))
21632 return Q_REGS;
21633 return NO_REGS;
21634 }
21635
21636 return regclass;
21637 }
21638
21639 /* Discourage putting floating-point values in SSE registers unless
21640 SSE math is being used, and likewise for the 387 registers. */
21641 enum reg_class
21642 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
21643 {
21644 enum machine_mode mode = GET_MODE (x);
21645
21646 /* Restrict the output reload class to the register bank that we are doing
21647 math on. If we would like not to return a subset of CLASS, reject this
21648 alternative: if reload cannot do this, it will still use its choice. */
21649 mode = GET_MODE (x);
21650 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21651 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
21652
21653 if (X87_FLOAT_MODE_P (mode))
21654 {
21655 if (regclass == FP_TOP_SSE_REGS)
21656 return FP_TOP_REG;
21657 else if (regclass == FP_SECOND_SSE_REGS)
21658 return FP_SECOND_REG;
21659 else
21660 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
21661 }
21662
21663 return regclass;
21664 }
21665
21666 /* If we are copying between general and FP registers, we need a memory
21667 location. The same is true for SSE and MMX registers.
21668
21669 To optimize register_move_cost performance, allow inline variant.
21670
21671 The macro can't work reliably when one of the CLASSES is class containing
21672 registers from multiple units (SSE, MMX, integer). We avoid this by never
21673 combining those units in single alternative in the machine description.
21674 Ensure that this constraint holds to avoid unexpected surprises.
21675
21676 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
21677 enforce these sanity checks. */
21678
21679 static inline int
21680 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
21681 enum machine_mode mode, int strict)
21682 {
21683 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
21684 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
21685 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
21686 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
21687 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
21688 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
21689 {
21690 gcc_assert (!strict);
21691 return true;
21692 }
21693
21694 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
21695 return true;
21696
21697 /* ??? This is a lie. We do have moves between mmx/general, and for
21698 mmx/sse2. But by saying we need secondary memory we discourage the
21699 register allocator from using the mmx registers unless needed. */
21700 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
21701 return true;
21702
21703 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21704 {
21705 /* SSE1 doesn't have any direct moves from other classes. */
21706 if (!TARGET_SSE2)
21707 return true;
21708
21709 /* If the target says that inter-unit moves are more expensive
21710 than moving through memory, then don't generate them. */
21711 if (!TARGET_INTER_UNIT_MOVES)
21712 return true;
21713
21714 /* Between SSE and general, we have moves no larger than word size. */
21715 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21716 return true;
21717 }
21718
21719 return false;
21720 }
21721
21722 int
21723 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
21724 enum machine_mode mode, int strict)
21725 {
21726 return inline_secondary_memory_needed (class1, class2, mode, strict);
21727 }
21728
21729 /* Return true if the registers in CLASS cannot represent the change from
21730 modes FROM to TO. */
21731
21732 bool
21733 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
21734 enum reg_class regclass)
21735 {
21736 if (from == to)
21737 return false;
21738
21739 /* x87 registers can't do subreg at all, as all values are reformatted
21740 to extended precision. */
21741 if (MAYBE_FLOAT_CLASS_P (regclass))
21742 return true;
21743
21744 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
21745 {
21746 /* Vector registers do not support QI or HImode loads. If we don't
21747 disallow a change to these modes, reload will assume it's ok to
21748 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
21749 the vec_dupv4hi pattern. */
21750 if (GET_MODE_SIZE (from) < 4)
21751 return true;
21752
21753 /* Vector registers do not support subreg with nonzero offsets, which
21754 are otherwise valid for integer registers. Since we can't see
21755 whether we have a nonzero offset from here, prohibit all
21756 nonparadoxical subregs changing size. */
21757 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
21758 return true;
21759 }
21760
21761 return false;
21762 }
21763
21764 /* Return the cost of moving data of mode M between a
21765 register and memory. A value of 2 is the default; this cost is
21766 relative to those in `REGISTER_MOVE_COST'.
21767
21768 This function is used extensively by register_move_cost that is used to
21769 build tables at startup. Make it inline in this case.
21770 When IN is 2, return maximum of in and out move cost.
21771
21772 If moving between registers and memory is more expensive than
21773 between two registers, you should define this macro to express the
21774 relative cost.
21775
21776 Model also increased moving costs of QImode registers in non
21777 Q_REGS classes.
21778 */
21779 static inline int
21780 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
21781 int in)
21782 {
21783 int cost;
21784 if (FLOAT_CLASS_P (regclass))
21785 {
21786 int index;
21787 switch (mode)
21788 {
21789 case SFmode:
21790 index = 0;
21791 break;
21792 case DFmode:
21793 index = 1;
21794 break;
21795 case XFmode:
21796 index = 2;
21797 break;
21798 default:
21799 return 100;
21800 }
21801 if (in == 2)
21802 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
21803 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
21804 }
21805 if (SSE_CLASS_P (regclass))
21806 {
21807 int index;
21808 switch (GET_MODE_SIZE (mode))
21809 {
21810 case 4:
21811 index = 0;
21812 break;
21813 case 8:
21814 index = 1;
21815 break;
21816 case 16:
21817 index = 2;
21818 break;
21819 default:
21820 return 100;
21821 }
21822 if (in == 2)
21823 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
21824 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
21825 }
21826 if (MMX_CLASS_P (regclass))
21827 {
21828 int index;
21829 switch (GET_MODE_SIZE (mode))
21830 {
21831 case 4:
21832 index = 0;
21833 break;
21834 case 8:
21835 index = 1;
21836 break;
21837 default:
21838 return 100;
21839 }
21840 if (in)
21841 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
21842 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
21843 }
21844 switch (GET_MODE_SIZE (mode))
21845 {
21846 case 1:
21847 if (Q_CLASS_P (regclass) || TARGET_64BIT)
21848 {
21849 if (!in)
21850 return ix86_cost->int_store[0];
21851 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
21852 cost = ix86_cost->movzbl_load;
21853 else
21854 cost = ix86_cost->int_load[0];
21855 if (in == 2)
21856 return MAX (cost, ix86_cost->int_store[0]);
21857 return cost;
21858 }
21859 else
21860 {
21861 if (in == 2)
21862 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
21863 if (in)
21864 return ix86_cost->movzbl_load;
21865 else
21866 return ix86_cost->int_store[0] + 4;
21867 }
21868 break;
21869 case 2:
21870 if (in == 2)
21871 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
21872 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
21873 default:
21874 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
21875 if (mode == TFmode)
21876 mode = XFmode;
21877 if (in == 2)
21878 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
21879 else if (in)
21880 cost = ix86_cost->int_load[2];
21881 else
21882 cost = ix86_cost->int_store[2];
21883 return (cost * (((int) GET_MODE_SIZE (mode)
21884 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
21885 }
21886 }
21887
21888 int
21889 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
21890 {
21891 return inline_memory_move_cost (mode, regclass, in);
21892 }
21893
21894
21895 /* Return the cost of moving data from a register in class CLASS1 to
21896 one in class CLASS2.
21897
21898 It is not required that the cost always equal 2 when FROM is the same as TO;
21899 on some machines it is expensive to move between registers if they are not
21900 general registers. */
21901
21902 int
21903 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
21904 enum reg_class class2)
21905 {
21906 /* In case we require secondary memory, compute cost of the store followed
21907 by load. In order to avoid bad register allocation choices, we need
21908 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
21909
21910 if (inline_secondary_memory_needed (class1, class2, mode, 0))
21911 {
21912 int cost = 1;
21913
21914 cost += inline_memory_move_cost (mode, class1, 2);
21915 cost += inline_memory_move_cost (mode, class2, 2);
21916
21917 /* In case of copying from general_purpose_register we may emit multiple
21918 stores followed by single load causing memory size mismatch stall.
21919 Count this as arbitrarily high cost of 20. */
21920 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
21921 cost += 20;
21922
21923 /* In the case of FP/MMX moves, the registers actually overlap, and we
21924 have to switch modes in order to treat them differently. */
21925 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
21926 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
21927 cost += 20;
21928
21929 return cost;
21930 }
21931
21932 /* Moves between SSE/MMX and integer unit are expensive. */
21933 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
21934 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21935
21936 /* ??? By keeping returned value relatively high, we limit the number
21937 of moves between integer and MMX/SSE registers for all targets.
21938 Additionally, high value prevents problem with x86_modes_tieable_p(),
21939 where integer modes in MMX/SSE registers are not tieable
21940 because of missing QImode and HImode moves to, from or between
21941 MMX/SSE registers. */
21942 return MAX (ix86_cost->mmxsse_to_integer, 8);
21943
21944 if (MAYBE_FLOAT_CLASS_P (class1))
21945 return ix86_cost->fp_move;
21946 if (MAYBE_SSE_CLASS_P (class1))
21947 return ix86_cost->sse_move;
21948 if (MAYBE_MMX_CLASS_P (class1))
21949 return ix86_cost->mmx_move;
21950 return 2;
21951 }
21952
21953 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
21954
21955 bool
21956 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
21957 {
21958 /* Flags and only flags can only hold CCmode values. */
21959 if (CC_REGNO_P (regno))
21960 return GET_MODE_CLASS (mode) == MODE_CC;
21961 if (GET_MODE_CLASS (mode) == MODE_CC
21962 || GET_MODE_CLASS (mode) == MODE_RANDOM
21963 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
21964 return 0;
21965 if (FP_REGNO_P (regno))
21966 return VALID_FP_MODE_P (mode);
21967 if (SSE_REGNO_P (regno))
21968 {
21969 /* We implement the move patterns for all vector modes into and
21970 out of SSE registers, even when no operation instructions
21971 are available. */
21972 return (VALID_SSE_REG_MODE (mode)
21973 || VALID_SSE2_REG_MODE (mode)
21974 || VALID_MMX_REG_MODE (mode)
21975 || VALID_MMX_REG_MODE_3DNOW (mode));
21976 }
21977 if (MMX_REGNO_P (regno))
21978 {
21979 /* We implement the move patterns for 3DNOW modes even in MMX mode,
21980 so if the register is available at all, then we can move data of
21981 the given mode into or out of it. */
21982 return (VALID_MMX_REG_MODE (mode)
21983 || VALID_MMX_REG_MODE_3DNOW (mode));
21984 }
21985
21986 if (mode == QImode)
21987 {
21988 /* Take care for QImode values - they can be in non-QI regs,
21989 but then they do cause partial register stalls. */
21990 if (regno < 4 || TARGET_64BIT)
21991 return 1;
21992 if (!TARGET_PARTIAL_REG_STALL)
21993 return 1;
21994 return reload_in_progress || reload_completed;
21995 }
21996 /* We handle both integer and floats in the general purpose registers. */
21997 else if (VALID_INT_MODE_P (mode))
21998 return 1;
21999 else if (VALID_FP_MODE_P (mode))
22000 return 1;
22001 else if (VALID_DFP_MODE_P (mode))
22002 return 1;
22003 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
22004 on to use that value in smaller contexts, this can easily force a
22005 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
22006 supporting DImode, allow it. */
22007 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
22008 return 1;
22009
22010 return 0;
22011 }
22012
22013 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
22014 tieable integer mode. */
22015
22016 static bool
22017 ix86_tieable_integer_mode_p (enum machine_mode mode)
22018 {
22019 switch (mode)
22020 {
22021 case HImode:
22022 case SImode:
22023 return true;
22024
22025 case QImode:
22026 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
22027
22028 case DImode:
22029 return TARGET_64BIT;
22030
22031 default:
22032 return false;
22033 }
22034 }
22035
22036 /* Return true if MODE1 is accessible in a register that can hold MODE2
22037 without copying. That is, all register classes that can hold MODE2
22038 can also hold MODE1. */
22039
22040 bool
22041 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22042 {
22043 if (mode1 == mode2)
22044 return true;
22045
22046 if (ix86_tieable_integer_mode_p (mode1)
22047 && ix86_tieable_integer_mode_p (mode2))
22048 return true;
22049
22050 /* MODE2 being XFmode implies fp stack or general regs, which means we
22051 can tie any smaller floating point modes to it. Note that we do not
22052 tie this with TFmode. */
22053 if (mode2 == XFmode)
22054 return mode1 == SFmode || mode1 == DFmode;
22055
22056 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
22057 that we can tie it with SFmode. */
22058 if (mode2 == DFmode)
22059 return mode1 == SFmode;
22060
22061 /* If MODE2 is only appropriate for an SSE register, then tie with
22062 any other mode acceptable to SSE registers. */
22063 if (GET_MODE_SIZE (mode2) == 16
22064 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
22065 return (GET_MODE_SIZE (mode1) == 16
22066 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
22067
22068 /* If MODE2 is appropriate for an MMX register, then tie
22069 with any other mode acceptable to MMX registers. */
22070 if (GET_MODE_SIZE (mode2) == 8
22071 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
22072 return (GET_MODE_SIZE (mode1) == 8
22073 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
22074
22075 return false;
22076 }
22077
22078 /* Compute a (partial) cost for rtx X. Return true if the complete
22079 cost has been computed, and false if subexpressions should be
22080 scanned. In either case, *TOTAL contains the cost result. */
22081
22082 static bool
22083 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
22084 {
22085 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22086 enum machine_mode mode = GET_MODE (x);
22087
22088 switch (code)
22089 {
22090 case CONST_INT:
22091 case CONST:
22092 case LABEL_REF:
22093 case SYMBOL_REF:
22094 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
22095 *total = 3;
22096 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
22097 *total = 2;
22098 else if (flag_pic && SYMBOLIC_CONST (x)
22099 && (!TARGET_64BIT
22100 || (!GET_CODE (x) != LABEL_REF
22101 && (GET_CODE (x) != SYMBOL_REF
22102 || !SYMBOL_REF_LOCAL_P (x)))))
22103 *total = 1;
22104 else
22105 *total = 0;
22106 return true;
22107
22108 case CONST_DOUBLE:
22109 if (mode == VOIDmode)
22110 *total = 0;
22111 else
22112 switch (standard_80387_constant_p (x))
22113 {
22114 case 1: /* 0.0 */
22115 *total = 1;
22116 break;
22117 default: /* Other constants */
22118 *total = 2;
22119 break;
22120 case 0:
22121 case -1:
22122 /* Start with (MEM (SYMBOL_REF)), since that's where
22123 it'll probably end up. Add a penalty for size. */
22124 *total = (COSTS_N_INSNS (1)
22125 + (flag_pic != 0 && !TARGET_64BIT)
22126 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
22127 break;
22128 }
22129 return true;
22130
22131 case ZERO_EXTEND:
22132 /* The zero extensions is often completely free on x86_64, so make
22133 it as cheap as possible. */
22134 if (TARGET_64BIT && mode == DImode
22135 && GET_MODE (XEXP (x, 0)) == SImode)
22136 *total = 1;
22137 else if (TARGET_ZERO_EXTEND_WITH_AND)
22138 *total = ix86_cost->add;
22139 else
22140 *total = ix86_cost->movzx;
22141 return false;
22142
22143 case SIGN_EXTEND:
22144 *total = ix86_cost->movsx;
22145 return false;
22146
22147 case ASHIFT:
22148 if (CONST_INT_P (XEXP (x, 1))
22149 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
22150 {
22151 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22152 if (value == 1)
22153 {
22154 *total = ix86_cost->add;
22155 return false;
22156 }
22157 if ((value == 2 || value == 3)
22158 && ix86_cost->lea <= ix86_cost->shift_const)
22159 {
22160 *total = ix86_cost->lea;
22161 return false;
22162 }
22163 }
22164 /* FALLTHRU */
22165
22166 case ROTATE:
22167 case ASHIFTRT:
22168 case LSHIFTRT:
22169 case ROTATERT:
22170 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
22171 {
22172 if (CONST_INT_P (XEXP (x, 1)))
22173 {
22174 if (INTVAL (XEXP (x, 1)) > 32)
22175 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
22176 else
22177 *total = ix86_cost->shift_const * 2;
22178 }
22179 else
22180 {
22181 if (GET_CODE (XEXP (x, 1)) == AND)
22182 *total = ix86_cost->shift_var * 2;
22183 else
22184 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
22185 }
22186 }
22187 else
22188 {
22189 if (CONST_INT_P (XEXP (x, 1)))
22190 *total = ix86_cost->shift_const;
22191 else
22192 *total = ix86_cost->shift_var;
22193 }
22194 return false;
22195
22196 case MULT:
22197 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22198 {
22199 /* ??? SSE scalar cost should be used here. */
22200 *total = ix86_cost->fmul;
22201 return false;
22202 }
22203 else if (X87_FLOAT_MODE_P (mode))
22204 {
22205 *total = ix86_cost->fmul;
22206 return false;
22207 }
22208 else if (FLOAT_MODE_P (mode))
22209 {
22210 /* ??? SSE vector cost should be used here. */
22211 *total = ix86_cost->fmul;
22212 return false;
22213 }
22214 else
22215 {
22216 rtx op0 = XEXP (x, 0);
22217 rtx op1 = XEXP (x, 1);
22218 int nbits;
22219 if (CONST_INT_P (XEXP (x, 1)))
22220 {
22221 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22222 for (nbits = 0; value != 0; value &= value - 1)
22223 nbits++;
22224 }
22225 else
22226 /* This is arbitrary. */
22227 nbits = 7;
22228
22229 /* Compute costs correctly for widening multiplication. */
22230 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
22231 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22232 == GET_MODE_SIZE (mode))
22233 {
22234 int is_mulwiden = 0;
22235 enum machine_mode inner_mode = GET_MODE (op0);
22236
22237 if (GET_CODE (op0) == GET_CODE (op1))
22238 is_mulwiden = 1, op1 = XEXP (op1, 0);
22239 else if (CONST_INT_P (op1))
22240 {
22241 if (GET_CODE (op0) == SIGN_EXTEND)
22242 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22243 == INTVAL (op1);
22244 else
22245 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22246 }
22247
22248 if (is_mulwiden)
22249 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22250 }
22251
22252 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
22253 + nbits * ix86_cost->mult_bit
22254 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
22255
22256 return true;
22257 }
22258
22259 case DIV:
22260 case UDIV:
22261 case MOD:
22262 case UMOD:
22263 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22264 /* ??? SSE cost should be used here. */
22265 *total = ix86_cost->fdiv;
22266 else if (X87_FLOAT_MODE_P (mode))
22267 *total = ix86_cost->fdiv;
22268 else if (FLOAT_MODE_P (mode))
22269 /* ??? SSE vector cost should be used here. */
22270 *total = ix86_cost->fdiv;
22271 else
22272 *total = ix86_cost->divide[MODE_INDEX (mode)];
22273 return false;
22274
22275 case PLUS:
22276 if (GET_MODE_CLASS (mode) == MODE_INT
22277 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
22278 {
22279 if (GET_CODE (XEXP (x, 0)) == PLUS
22280 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22281 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22282 && CONSTANT_P (XEXP (x, 1)))
22283 {
22284 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22285 if (val == 2 || val == 4 || val == 8)
22286 {
22287 *total = ix86_cost->lea;
22288 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22289 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
22290 outer_code);
22291 *total += rtx_cost (XEXP (x, 1), outer_code);
22292 return true;
22293 }
22294 }
22295 else if (GET_CODE (XEXP (x, 0)) == MULT
22296 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22297 {
22298 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22299 if (val == 2 || val == 4 || val == 8)
22300 {
22301 *total = ix86_cost->lea;
22302 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22303 *total += rtx_cost (XEXP (x, 1), outer_code);
22304 return true;
22305 }
22306 }
22307 else if (GET_CODE (XEXP (x, 0)) == PLUS)
22308 {
22309 *total = ix86_cost->lea;
22310 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22311 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22312 *total += rtx_cost (XEXP (x, 1), outer_code);
22313 return true;
22314 }
22315 }
22316 /* FALLTHRU */
22317
22318 case MINUS:
22319 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22320 {
22321 /* ??? SSE cost should be used here. */
22322 *total = ix86_cost->fadd;
22323 return false;
22324 }
22325 else if (X87_FLOAT_MODE_P (mode))
22326 {
22327 *total = ix86_cost->fadd;
22328 return false;
22329 }
22330 else if (FLOAT_MODE_P (mode))
22331 {
22332 /* ??? SSE vector cost should be used here. */
22333 *total = ix86_cost->fadd;
22334 return false;
22335 }
22336 /* FALLTHRU */
22337
22338 case AND:
22339 case IOR:
22340 case XOR:
22341 if (!TARGET_64BIT && mode == DImode)
22342 {
22343 *total = (ix86_cost->add * 2
22344 + (rtx_cost (XEXP (x, 0), outer_code)
22345 << (GET_MODE (XEXP (x, 0)) != DImode))
22346 + (rtx_cost (XEXP (x, 1), outer_code)
22347 << (GET_MODE (XEXP (x, 1)) != DImode)));
22348 return true;
22349 }
22350 /* FALLTHRU */
22351
22352 case NEG:
22353 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22354 {
22355 /* ??? SSE cost should be used here. */
22356 *total = ix86_cost->fchs;
22357 return false;
22358 }
22359 else if (X87_FLOAT_MODE_P (mode))
22360 {
22361 *total = ix86_cost->fchs;
22362 return false;
22363 }
22364 else if (FLOAT_MODE_P (mode))
22365 {
22366 /* ??? SSE vector cost should be used here. */
22367 *total = ix86_cost->fchs;
22368 return false;
22369 }
22370 /* FALLTHRU */
22371
22372 case NOT:
22373 if (!TARGET_64BIT && mode == DImode)
22374 *total = ix86_cost->add * 2;
22375 else
22376 *total = ix86_cost->add;
22377 return false;
22378
22379 case COMPARE:
22380 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
22381 && XEXP (XEXP (x, 0), 1) == const1_rtx
22382 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
22383 && XEXP (x, 1) == const0_rtx)
22384 {
22385 /* This kind of construct is implemented using test[bwl].
22386 Treat it as if we had an AND. */
22387 *total = (ix86_cost->add
22388 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
22389 + rtx_cost (const1_rtx, outer_code));
22390 return true;
22391 }
22392 return false;
22393
22394 case FLOAT_EXTEND:
22395 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
22396 *total = 0;
22397 return false;
22398
22399 case ABS:
22400 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22401 /* ??? SSE cost should be used here. */
22402 *total = ix86_cost->fabs;
22403 else if (X87_FLOAT_MODE_P (mode))
22404 *total = ix86_cost->fabs;
22405 else if (FLOAT_MODE_P (mode))
22406 /* ??? SSE vector cost should be used here. */
22407 *total = ix86_cost->fabs;
22408 return false;
22409
22410 case SQRT:
22411 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22412 /* ??? SSE cost should be used here. */
22413 *total = ix86_cost->fsqrt;
22414 else if (X87_FLOAT_MODE_P (mode))
22415 *total = ix86_cost->fsqrt;
22416 else if (FLOAT_MODE_P (mode))
22417 /* ??? SSE vector cost should be used here. */
22418 *total = ix86_cost->fsqrt;
22419 return false;
22420
22421 case UNSPEC:
22422 if (XINT (x, 1) == UNSPEC_TP)
22423 *total = 0;
22424 return false;
22425
22426 default:
22427 return false;
22428 }
22429 }
22430
22431 #if TARGET_MACHO
22432
22433 static int current_machopic_label_num;
22434
22435 /* Given a symbol name and its associated stub, write out the
22436 definition of the stub. */
22437
22438 void
22439 machopic_output_stub (FILE *file, const char *symb, const char *stub)
22440 {
22441 unsigned int length;
22442 char *binder_name, *symbol_name, lazy_ptr_name[32];
22443 int label = ++current_machopic_label_num;
22444
22445 /* For 64-bit we shouldn't get here. */
22446 gcc_assert (!TARGET_64BIT);
22447
22448 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22449 symb = (*targetm.strip_name_encoding) (symb);
22450
22451 length = strlen (stub);
22452 binder_name = alloca (length + 32);
22453 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22454
22455 length = strlen (symb);
22456 symbol_name = alloca (length + 32);
22457 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22458
22459 sprintf (lazy_ptr_name, "L%d$lz", label);
22460
22461 if (MACHOPIC_PURE)
22462 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
22463 else
22464 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
22465
22466 fprintf (file, "%s:\n", stub);
22467 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22468
22469 if (MACHOPIC_PURE)
22470 {
22471 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
22472 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
22473 fprintf (file, "\tjmp\t*%%edx\n");
22474 }
22475 else
22476 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
22477
22478 fprintf (file, "%s:\n", binder_name);
22479
22480 if (MACHOPIC_PURE)
22481 {
22482 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
22483 fprintf (file, "\tpushl\t%%eax\n");
22484 }
22485 else
22486 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
22487
22488 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
22489
22490 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
22491 fprintf (file, "%s:\n", lazy_ptr_name);
22492 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22493 fprintf (file, "\t.long %s\n", binder_name);
22494 }
22495
22496 void
22497 darwin_x86_file_end (void)
22498 {
22499 darwin_file_end ();
22500 ix86_file_end ();
22501 }
22502 #endif /* TARGET_MACHO */
22503
22504 /* Order the registers for register allocator. */
22505
22506 void
22507 x86_order_regs_for_local_alloc (void)
22508 {
22509 int pos = 0;
22510 int i;
22511
22512 /* First allocate the local general purpose registers. */
22513 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22514 if (GENERAL_REGNO_P (i) && call_used_regs[i])
22515 reg_alloc_order [pos++] = i;
22516
22517 /* Global general purpose registers. */
22518 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22519 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
22520 reg_alloc_order [pos++] = i;
22521
22522 /* x87 registers come first in case we are doing FP math
22523 using them. */
22524 if (!TARGET_SSE_MATH)
22525 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22526 reg_alloc_order [pos++] = i;
22527
22528 /* SSE registers. */
22529 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
22530 reg_alloc_order [pos++] = i;
22531 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
22532 reg_alloc_order [pos++] = i;
22533
22534 /* x87 registers. */
22535 if (TARGET_SSE_MATH)
22536 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22537 reg_alloc_order [pos++] = i;
22538
22539 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
22540 reg_alloc_order [pos++] = i;
22541
22542 /* Initialize the rest of array as we do not allocate some registers
22543 at all. */
22544 while (pos < FIRST_PSEUDO_REGISTER)
22545 reg_alloc_order [pos++] = 0;
22546 }
22547
22548 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
22549 struct attribute_spec.handler. */
22550 static tree
22551 ix86_handle_struct_attribute (tree *node, tree name,
22552 tree args ATTRIBUTE_UNUSED,
22553 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
22554 {
22555 tree *type = NULL;
22556 if (DECL_P (*node))
22557 {
22558 if (TREE_CODE (*node) == TYPE_DECL)
22559 type = &TREE_TYPE (*node);
22560 }
22561 else
22562 type = node;
22563
22564 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
22565 || TREE_CODE (*type) == UNION_TYPE)))
22566 {
22567 warning (OPT_Wattributes, "%qs attribute ignored",
22568 IDENTIFIER_POINTER (name));
22569 *no_add_attrs = true;
22570 }
22571
22572 else if ((is_attribute_p ("ms_struct", name)
22573 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
22574 || ((is_attribute_p ("gcc_struct", name)
22575 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
22576 {
22577 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
22578 IDENTIFIER_POINTER (name));
22579 *no_add_attrs = true;
22580 }
22581
22582 return NULL_TREE;
22583 }
22584
22585 static bool
22586 ix86_ms_bitfield_layout_p (const_tree record_type)
22587 {
22588 return (TARGET_MS_BITFIELD_LAYOUT &&
22589 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
22590 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
22591 }
22592
22593 /* Returns an expression indicating where the this parameter is
22594 located on entry to the FUNCTION. */
22595
22596 static rtx
22597 x86_this_parameter (tree function)
22598 {
22599 tree type = TREE_TYPE (function);
22600 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
22601
22602 if (TARGET_64BIT)
22603 {
22604 const int *parm_regs;
22605
22606 if (TARGET_64BIT_MS_ABI)
22607 parm_regs = x86_64_ms_abi_int_parameter_registers;
22608 else
22609 parm_regs = x86_64_int_parameter_registers;
22610 return gen_rtx_REG (DImode, parm_regs[aggr]);
22611 }
22612
22613 if (ix86_function_regparm (type, function) > 0 && !stdarg_p (type))
22614 {
22615 int regno = AX_REG;
22616 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
22617 regno = CX_REG;
22618 return gen_rtx_REG (SImode, regno);
22619 }
22620
22621 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
22622 }
22623
22624 /* Determine whether x86_output_mi_thunk can succeed. */
22625
22626 static bool
22627 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
22628 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
22629 HOST_WIDE_INT vcall_offset, const_tree function)
22630 {
22631 /* 64-bit can handle anything. */
22632 if (TARGET_64BIT)
22633 return true;
22634
22635 /* For 32-bit, everything's fine if we have one free register. */
22636 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
22637 return true;
22638
22639 /* Need a free register for vcall_offset. */
22640 if (vcall_offset)
22641 return false;
22642
22643 /* Need a free register for GOT references. */
22644 if (flag_pic && !(*targetm.binds_local_p) (function))
22645 return false;
22646
22647 /* Otherwise ok. */
22648 return true;
22649 }
22650
22651 /* Output the assembler code for a thunk function. THUNK_DECL is the
22652 declaration for the thunk function itself, FUNCTION is the decl for
22653 the target function. DELTA is an immediate constant offset to be
22654 added to THIS. If VCALL_OFFSET is nonzero, the word at
22655 *(*this + vcall_offset) should be added to THIS. */
22656
22657 static void
22658 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
22659 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
22660 HOST_WIDE_INT vcall_offset, tree function)
22661 {
22662 rtx xops[3];
22663 rtx this_param = x86_this_parameter (function);
22664 rtx this_reg, tmp;
22665
22666 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
22667 pull it in now and let DELTA benefit. */
22668 if (REG_P (this_param))
22669 this_reg = this_param;
22670 else if (vcall_offset)
22671 {
22672 /* Put the this parameter into %eax. */
22673 xops[0] = this_param;
22674 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
22675 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22676 }
22677 else
22678 this_reg = NULL_RTX;
22679
22680 /* Adjust the this parameter by a fixed constant. */
22681 if (delta)
22682 {
22683 xops[0] = GEN_INT (delta);
22684 xops[1] = this_reg ? this_reg : this_param;
22685 if (TARGET_64BIT)
22686 {
22687 if (!x86_64_general_operand (xops[0], DImode))
22688 {
22689 tmp = gen_rtx_REG (DImode, R10_REG);
22690 xops[1] = tmp;
22691 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
22692 xops[0] = tmp;
22693 xops[1] = this_param;
22694 }
22695 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
22696 }
22697 else
22698 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
22699 }
22700
22701 /* Adjust the this parameter by a value stored in the vtable. */
22702 if (vcall_offset)
22703 {
22704 if (TARGET_64BIT)
22705 tmp = gen_rtx_REG (DImode, R10_REG);
22706 else
22707 {
22708 int tmp_regno = CX_REG;
22709 if (lookup_attribute ("fastcall",
22710 TYPE_ATTRIBUTES (TREE_TYPE (function))))
22711 tmp_regno = AX_REG;
22712 tmp = gen_rtx_REG (SImode, tmp_regno);
22713 }
22714
22715 xops[0] = gen_rtx_MEM (Pmode, this_reg);
22716 xops[1] = tmp;
22717 if (TARGET_64BIT)
22718 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
22719 else
22720 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22721
22722 /* Adjust the this parameter. */
22723 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
22724 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
22725 {
22726 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
22727 xops[0] = GEN_INT (vcall_offset);
22728 xops[1] = tmp2;
22729 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
22730 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
22731 }
22732 xops[1] = this_reg;
22733 if (TARGET_64BIT)
22734 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
22735 else
22736 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
22737 }
22738
22739 /* If necessary, drop THIS back to its stack slot. */
22740 if (this_reg && this_reg != this_param)
22741 {
22742 xops[0] = this_reg;
22743 xops[1] = this_param;
22744 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22745 }
22746
22747 xops[0] = XEXP (DECL_RTL (function), 0);
22748 if (TARGET_64BIT)
22749 {
22750 if (!flag_pic || (*targetm.binds_local_p) (function))
22751 output_asm_insn ("jmp\t%P0", xops);
22752 /* All thunks should be in the same object as their target,
22753 and thus binds_local_p should be true. */
22754 else if (TARGET_64BIT_MS_ABI)
22755 gcc_unreachable ();
22756 else
22757 {
22758 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
22759 tmp = gen_rtx_CONST (Pmode, tmp);
22760 tmp = gen_rtx_MEM (QImode, tmp);
22761 xops[0] = tmp;
22762 output_asm_insn ("jmp\t%A0", xops);
22763 }
22764 }
22765 else
22766 {
22767 if (!flag_pic || (*targetm.binds_local_p) (function))
22768 output_asm_insn ("jmp\t%P0", xops);
22769 else
22770 #if TARGET_MACHO
22771 if (TARGET_MACHO)
22772 {
22773 rtx sym_ref = XEXP (DECL_RTL (function), 0);
22774 tmp = (gen_rtx_SYMBOL_REF
22775 (Pmode,
22776 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
22777 tmp = gen_rtx_MEM (QImode, tmp);
22778 xops[0] = tmp;
22779 output_asm_insn ("jmp\t%0", xops);
22780 }
22781 else
22782 #endif /* TARGET_MACHO */
22783 {
22784 tmp = gen_rtx_REG (SImode, CX_REG);
22785 output_set_got (tmp, NULL_RTX);
22786
22787 xops[1] = tmp;
22788 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
22789 output_asm_insn ("jmp\t{*}%1", xops);
22790 }
22791 }
22792 }
22793
22794 static void
22795 x86_file_start (void)
22796 {
22797 default_file_start ();
22798 #if TARGET_MACHO
22799 darwin_file_start ();
22800 #endif
22801 if (X86_FILE_START_VERSION_DIRECTIVE)
22802 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
22803 if (X86_FILE_START_FLTUSED)
22804 fputs ("\t.global\t__fltused\n", asm_out_file);
22805 if (ix86_asm_dialect == ASM_INTEL)
22806 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
22807 }
22808
22809 int
22810 x86_field_alignment (tree field, int computed)
22811 {
22812 enum machine_mode mode;
22813 tree type = TREE_TYPE (field);
22814
22815 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
22816 return computed;
22817 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
22818 ? get_inner_array_type (type) : type);
22819 if (mode == DFmode || mode == DCmode
22820 || GET_MODE_CLASS (mode) == MODE_INT
22821 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
22822 return MIN (32, computed);
22823 return computed;
22824 }
22825
22826 /* Output assembler code to FILE to increment profiler label # LABELNO
22827 for profiling a function entry. */
22828 void
22829 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
22830 {
22831 if (TARGET_64BIT)
22832 {
22833 #ifndef NO_PROFILE_COUNTERS
22834 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
22835 #endif
22836
22837 if (!TARGET_64BIT_MS_ABI && flag_pic)
22838 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
22839 else
22840 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
22841 }
22842 else if (flag_pic)
22843 {
22844 #ifndef NO_PROFILE_COUNTERS
22845 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
22846 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
22847 #endif
22848 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
22849 }
22850 else
22851 {
22852 #ifndef NO_PROFILE_COUNTERS
22853 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
22854 PROFILE_COUNT_REGISTER);
22855 #endif
22856 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
22857 }
22858 }
22859
22860 /* We don't have exact information about the insn sizes, but we may assume
22861 quite safely that we are informed about all 1 byte insns and memory
22862 address sizes. This is enough to eliminate unnecessary padding in
22863 99% of cases. */
22864
22865 static int
22866 min_insn_size (rtx insn)
22867 {
22868 int l = 0;
22869
22870 if (!INSN_P (insn) || !active_insn_p (insn))
22871 return 0;
22872
22873 /* Discard alignments we've emit and jump instructions. */
22874 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
22875 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
22876 return 0;
22877 if (JUMP_P (insn)
22878 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
22879 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
22880 return 0;
22881
22882 /* Important case - calls are always 5 bytes.
22883 It is common to have many calls in the row. */
22884 if (CALL_P (insn)
22885 && symbolic_reference_mentioned_p (PATTERN (insn))
22886 && !SIBLING_CALL_P (insn))
22887 return 5;
22888 if (get_attr_length (insn) <= 1)
22889 return 1;
22890
22891 /* For normal instructions we may rely on the sizes of addresses
22892 and the presence of symbol to require 4 bytes of encoding.
22893 This is not the case for jumps where references are PC relative. */
22894 if (!JUMP_P (insn))
22895 {
22896 l = get_attr_length_address (insn);
22897 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
22898 l = 4;
22899 }
22900 if (l)
22901 return 1+l;
22902 else
22903 return 2;
22904 }
22905
22906 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
22907 window. */
22908
22909 static void
22910 ix86_avoid_jump_misspredicts (void)
22911 {
22912 rtx insn, start = get_insns ();
22913 int nbytes = 0, njumps = 0;
22914 int isjump = 0;
22915
22916 /* Look for all minimal intervals of instructions containing 4 jumps.
22917 The intervals are bounded by START and INSN. NBYTES is the total
22918 size of instructions in the interval including INSN and not including
22919 START. When the NBYTES is smaller than 16 bytes, it is possible
22920 that the end of START and INSN ends up in the same 16byte page.
22921
22922 The smallest offset in the page INSN can start is the case where START
22923 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
22924 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
22925 */
22926 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22927 {
22928
22929 nbytes += min_insn_size (insn);
22930 if (dump_file)
22931 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
22932 INSN_UID (insn), min_insn_size (insn));
22933 if ((JUMP_P (insn)
22934 && GET_CODE (PATTERN (insn)) != ADDR_VEC
22935 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
22936 || CALL_P (insn))
22937 njumps++;
22938 else
22939 continue;
22940
22941 while (njumps > 3)
22942 {
22943 start = NEXT_INSN (start);
22944 if ((JUMP_P (start)
22945 && GET_CODE (PATTERN (start)) != ADDR_VEC
22946 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
22947 || CALL_P (start))
22948 njumps--, isjump = 1;
22949 else
22950 isjump = 0;
22951 nbytes -= min_insn_size (start);
22952 }
22953 gcc_assert (njumps >= 0);
22954 if (dump_file)
22955 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
22956 INSN_UID (start), INSN_UID (insn), nbytes);
22957
22958 if (njumps == 3 && isjump && nbytes < 16)
22959 {
22960 int padsize = 15 - nbytes + min_insn_size (insn);
22961
22962 if (dump_file)
22963 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
22964 INSN_UID (insn), padsize);
22965 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
22966 }
22967 }
22968 }
22969
22970 /* AMD Athlon works faster
22971 when RET is not destination of conditional jump or directly preceded
22972 by other jump instruction. We avoid the penalty by inserting NOP just
22973 before the RET instructions in such cases. */
22974 static void
22975 ix86_pad_returns (void)
22976 {
22977 edge e;
22978 edge_iterator ei;
22979
22980 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
22981 {
22982 basic_block bb = e->src;
22983 rtx ret = BB_END (bb);
22984 rtx prev;
22985 bool replace = false;
22986
22987 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
22988 || !maybe_hot_bb_p (bb))
22989 continue;
22990 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
22991 if (active_insn_p (prev) || LABEL_P (prev))
22992 break;
22993 if (prev && LABEL_P (prev))
22994 {
22995 edge e;
22996 edge_iterator ei;
22997
22998 FOR_EACH_EDGE (e, ei, bb->preds)
22999 if (EDGE_FREQUENCY (e) && e->src->index >= 0
23000 && !(e->flags & EDGE_FALLTHRU))
23001 replace = true;
23002 }
23003 if (!replace)
23004 {
23005 prev = prev_active_insn (ret);
23006 if (prev
23007 && ((JUMP_P (prev) && any_condjump_p (prev))
23008 || CALL_P (prev)))
23009 replace = true;
23010 /* Empty functions get branch mispredict even when the jump destination
23011 is not visible to us. */
23012 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
23013 replace = true;
23014 }
23015 if (replace)
23016 {
23017 emit_insn_before (gen_return_internal_long (), ret);
23018 delete_insn (ret);
23019 }
23020 }
23021 }
23022
23023 /* Implement machine specific optimizations. We implement padding of returns
23024 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23025 static void
23026 ix86_reorg (void)
23027 {
23028 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
23029 ix86_pad_returns ();
23030 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
23031 ix86_avoid_jump_misspredicts ();
23032 }
23033
23034 /* Return nonzero when QImode register that must be represented via REX prefix
23035 is used. */
23036 bool
23037 x86_extended_QIreg_mentioned_p (rtx insn)
23038 {
23039 int i;
23040 extract_insn_cached (insn);
23041 for (i = 0; i < recog_data.n_operands; i++)
23042 if (REG_P (recog_data.operand[i])
23043 && REGNO (recog_data.operand[i]) >= 4)
23044 return true;
23045 return false;
23046 }
23047
23048 /* Return nonzero when P points to register encoded via REX prefix.
23049 Called via for_each_rtx. */
23050 static int
23051 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
23052 {
23053 unsigned int regno;
23054 if (!REG_P (*p))
23055 return 0;
23056 regno = REGNO (*p);
23057 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
23058 }
23059
23060 /* Return true when INSN mentions register that must be encoded using REX
23061 prefix. */
23062 bool
23063 x86_extended_reg_mentioned_p (rtx insn)
23064 {
23065 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
23066 }
23067
23068 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23069 optabs would emit if we didn't have TFmode patterns. */
23070
23071 void
23072 x86_emit_floatuns (rtx operands[2])
23073 {
23074 rtx neglab, donelab, i0, i1, f0, in, out;
23075 enum machine_mode mode, inmode;
23076
23077 inmode = GET_MODE (operands[1]);
23078 gcc_assert (inmode == SImode || inmode == DImode);
23079
23080 out = operands[0];
23081 in = force_reg (inmode, operands[1]);
23082 mode = GET_MODE (out);
23083 neglab = gen_label_rtx ();
23084 donelab = gen_label_rtx ();
23085 f0 = gen_reg_rtx (mode);
23086
23087 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23088
23089 expand_float (out, in, 0);
23090
23091 emit_jump_insn (gen_jump (donelab));
23092 emit_barrier ();
23093
23094 emit_label (neglab);
23095
23096 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23097 1, OPTAB_DIRECT);
23098 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23099 1, OPTAB_DIRECT);
23100 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23101
23102 expand_float (f0, i0, 0);
23103
23104 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
23105
23106 emit_label (donelab);
23107 }
23108 \f
23109 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23110 with all elements equal to VAR. Return true if successful. */
23111
23112 static bool
23113 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
23114 rtx target, rtx val)
23115 {
23116 enum machine_mode smode, wsmode, wvmode;
23117 rtx x;
23118
23119 switch (mode)
23120 {
23121 case V2SImode:
23122 case V2SFmode:
23123 if (!mmx_ok)
23124 return false;
23125 /* FALLTHRU */
23126
23127 case V2DFmode:
23128 case V2DImode:
23129 case V4SFmode:
23130 case V4SImode:
23131 val = force_reg (GET_MODE_INNER (mode), val);
23132 x = gen_rtx_VEC_DUPLICATE (mode, val);
23133 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23134 return true;
23135
23136 case V4HImode:
23137 if (!mmx_ok)
23138 return false;
23139 if (TARGET_SSE || TARGET_3DNOW_A)
23140 {
23141 val = gen_lowpart (SImode, val);
23142 x = gen_rtx_TRUNCATE (HImode, val);
23143 x = gen_rtx_VEC_DUPLICATE (mode, x);
23144 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23145 return true;
23146 }
23147 else
23148 {
23149 smode = HImode;
23150 wsmode = SImode;
23151 wvmode = V2SImode;
23152 goto widen;
23153 }
23154
23155 case V8QImode:
23156 if (!mmx_ok)
23157 return false;
23158 smode = QImode;
23159 wsmode = HImode;
23160 wvmode = V4HImode;
23161 goto widen;
23162 case V8HImode:
23163 if (TARGET_SSE2)
23164 {
23165 rtx tmp1, tmp2;
23166 /* Extend HImode to SImode using a paradoxical SUBREG. */
23167 tmp1 = gen_reg_rtx (SImode);
23168 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23169 /* Insert the SImode value as low element of V4SImode vector. */
23170 tmp2 = gen_reg_rtx (V4SImode);
23171 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23172 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23173 CONST0_RTX (V4SImode),
23174 const1_rtx);
23175 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23176 /* Cast the V4SImode vector back to a V8HImode vector. */
23177 tmp1 = gen_reg_rtx (V8HImode);
23178 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
23179 /* Duplicate the low short through the whole low SImode word. */
23180 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
23181 /* Cast the V8HImode vector back to a V4SImode vector. */
23182 tmp2 = gen_reg_rtx (V4SImode);
23183 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23184 /* Replicate the low element of the V4SImode vector. */
23185 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23186 /* Cast the V2SImode back to V8HImode, and store in target. */
23187 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
23188 return true;
23189 }
23190 smode = HImode;
23191 wsmode = SImode;
23192 wvmode = V4SImode;
23193 goto widen;
23194 case V16QImode:
23195 if (TARGET_SSE2)
23196 {
23197 rtx tmp1, tmp2;
23198 /* Extend QImode to SImode using a paradoxical SUBREG. */
23199 tmp1 = gen_reg_rtx (SImode);
23200 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23201 /* Insert the SImode value as low element of V4SImode vector. */
23202 tmp2 = gen_reg_rtx (V4SImode);
23203 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23204 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23205 CONST0_RTX (V4SImode),
23206 const1_rtx);
23207 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23208 /* Cast the V4SImode vector back to a V16QImode vector. */
23209 tmp1 = gen_reg_rtx (V16QImode);
23210 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
23211 /* Duplicate the low byte through the whole low SImode word. */
23212 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23213 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23214 /* Cast the V16QImode vector back to a V4SImode vector. */
23215 tmp2 = gen_reg_rtx (V4SImode);
23216 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23217 /* Replicate the low element of the V4SImode vector. */
23218 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23219 /* Cast the V2SImode back to V16QImode, and store in target. */
23220 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
23221 return true;
23222 }
23223 smode = QImode;
23224 wsmode = HImode;
23225 wvmode = V8HImode;
23226 goto widen;
23227 widen:
23228 /* Replicate the value once into the next wider mode and recurse. */
23229 val = convert_modes (wsmode, smode, val, true);
23230 x = expand_simple_binop (wsmode, ASHIFT, val,
23231 GEN_INT (GET_MODE_BITSIZE (smode)),
23232 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23233 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
23234
23235 x = gen_reg_rtx (wvmode);
23236 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
23237 gcc_unreachable ();
23238 emit_move_insn (target, gen_lowpart (mode, x));
23239 return true;
23240
23241 default:
23242 return false;
23243 }
23244 }
23245
23246 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23247 whose ONE_VAR element is VAR, and other elements are zero. Return true
23248 if successful. */
23249
23250 static bool
23251 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
23252 rtx target, rtx var, int one_var)
23253 {
23254 enum machine_mode vsimode;
23255 rtx new_target;
23256 rtx x, tmp;
23257
23258 switch (mode)
23259 {
23260 case V2SFmode:
23261 case V2SImode:
23262 if (!mmx_ok)
23263 return false;
23264 /* FALLTHRU */
23265
23266 case V2DFmode:
23267 case V2DImode:
23268 if (one_var != 0)
23269 return false;
23270 var = force_reg (GET_MODE_INNER (mode), var);
23271 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
23272 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23273 return true;
23274
23275 case V4SFmode:
23276 case V4SImode:
23277 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
23278 new_target = gen_reg_rtx (mode);
23279 else
23280 new_target = target;
23281 var = force_reg (GET_MODE_INNER (mode), var);
23282 x = gen_rtx_VEC_DUPLICATE (mode, var);
23283 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
23284 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
23285 if (one_var != 0)
23286 {
23287 /* We need to shuffle the value to the correct position, so
23288 create a new pseudo to store the intermediate result. */
23289
23290 /* With SSE2, we can use the integer shuffle insns. */
23291 if (mode != V4SFmode && TARGET_SSE2)
23292 {
23293 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
23294 GEN_INT (1),
23295 GEN_INT (one_var == 1 ? 0 : 1),
23296 GEN_INT (one_var == 2 ? 0 : 1),
23297 GEN_INT (one_var == 3 ? 0 : 1)));
23298 if (target != new_target)
23299 emit_move_insn (target, new_target);
23300 return true;
23301 }
23302
23303 /* Otherwise convert the intermediate result to V4SFmode and
23304 use the SSE1 shuffle instructions. */
23305 if (mode != V4SFmode)
23306 {
23307 tmp = gen_reg_rtx (V4SFmode);
23308 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
23309 }
23310 else
23311 tmp = new_target;
23312
23313 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
23314 GEN_INT (1),
23315 GEN_INT (one_var == 1 ? 0 : 1),
23316 GEN_INT (one_var == 2 ? 0+4 : 1+4),
23317 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
23318
23319 if (mode != V4SFmode)
23320 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
23321 else if (tmp != target)
23322 emit_move_insn (target, tmp);
23323 }
23324 else if (target != new_target)
23325 emit_move_insn (target, new_target);
23326 return true;
23327
23328 case V8HImode:
23329 case V16QImode:
23330 vsimode = V4SImode;
23331 goto widen;
23332 case V4HImode:
23333 case V8QImode:
23334 if (!mmx_ok)
23335 return false;
23336 vsimode = V2SImode;
23337 goto widen;
23338 widen:
23339 if (one_var != 0)
23340 return false;
23341
23342 /* Zero extend the variable element to SImode and recurse. */
23343 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
23344
23345 x = gen_reg_rtx (vsimode);
23346 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
23347 var, one_var))
23348 gcc_unreachable ();
23349
23350 emit_move_insn (target, gen_lowpart (mode, x));
23351 return true;
23352
23353 default:
23354 return false;
23355 }
23356 }
23357
23358 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23359 consisting of the values in VALS. It is known that all elements
23360 except ONE_VAR are constants. Return true if successful. */
23361
23362 static bool
23363 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
23364 rtx target, rtx vals, int one_var)
23365 {
23366 rtx var = XVECEXP (vals, 0, one_var);
23367 enum machine_mode wmode;
23368 rtx const_vec, x;
23369
23370 const_vec = copy_rtx (vals);
23371 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
23372 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
23373
23374 switch (mode)
23375 {
23376 case V2DFmode:
23377 case V2DImode:
23378 case V2SFmode:
23379 case V2SImode:
23380 /* For the two element vectors, it's just as easy to use
23381 the general case. */
23382 return false;
23383
23384 case V4SFmode:
23385 case V4SImode:
23386 case V8HImode:
23387 case V4HImode:
23388 break;
23389
23390 case V16QImode:
23391 wmode = V8HImode;
23392 goto widen;
23393 case V8QImode:
23394 wmode = V4HImode;
23395 goto widen;
23396 widen:
23397 /* There's no way to set one QImode entry easily. Combine
23398 the variable value with its adjacent constant value, and
23399 promote to an HImode set. */
23400 x = XVECEXP (vals, 0, one_var ^ 1);
23401 if (one_var & 1)
23402 {
23403 var = convert_modes (HImode, QImode, var, true);
23404 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
23405 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23406 x = GEN_INT (INTVAL (x) & 0xff);
23407 }
23408 else
23409 {
23410 var = convert_modes (HImode, QImode, var, true);
23411 x = gen_int_mode (INTVAL (x) << 8, HImode);
23412 }
23413 if (x != const0_rtx)
23414 var = expand_simple_binop (HImode, IOR, var, x, var,
23415 1, OPTAB_LIB_WIDEN);
23416
23417 x = gen_reg_rtx (wmode);
23418 emit_move_insn (x, gen_lowpart (wmode, const_vec));
23419 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
23420
23421 emit_move_insn (target, gen_lowpart (mode, x));
23422 return true;
23423
23424 default:
23425 return false;
23426 }
23427
23428 emit_move_insn (target, const_vec);
23429 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23430 return true;
23431 }
23432
23433 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
23434 all values variable, and none identical. */
23435
23436 static void
23437 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
23438 rtx target, rtx vals)
23439 {
23440 enum machine_mode half_mode = GET_MODE_INNER (mode);
23441 rtx op0 = NULL, op1 = NULL;
23442 bool use_vec_concat = false;
23443
23444 switch (mode)
23445 {
23446 case V2SFmode:
23447 case V2SImode:
23448 if (!mmx_ok && !TARGET_SSE)
23449 break;
23450 /* FALLTHRU */
23451
23452 case V2DFmode:
23453 case V2DImode:
23454 /* For the two element vectors, we always implement VEC_CONCAT. */
23455 op0 = XVECEXP (vals, 0, 0);
23456 op1 = XVECEXP (vals, 0, 1);
23457 use_vec_concat = true;
23458 break;
23459
23460 case V4SFmode:
23461 half_mode = V2SFmode;
23462 goto half;
23463 case V4SImode:
23464 half_mode = V2SImode;
23465 goto half;
23466 half:
23467 {
23468 rtvec v;
23469
23470 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
23471 Recurse to load the two halves. */
23472
23473 op0 = gen_reg_rtx (half_mode);
23474 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
23475 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
23476
23477 op1 = gen_reg_rtx (half_mode);
23478 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
23479 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
23480
23481 use_vec_concat = true;
23482 }
23483 break;
23484
23485 case V8HImode:
23486 case V16QImode:
23487 case V4HImode:
23488 case V8QImode:
23489 break;
23490
23491 default:
23492 gcc_unreachable ();
23493 }
23494
23495 if (use_vec_concat)
23496 {
23497 if (!register_operand (op0, half_mode))
23498 op0 = force_reg (half_mode, op0);
23499 if (!register_operand (op1, half_mode))
23500 op1 = force_reg (half_mode, op1);
23501
23502 emit_insn (gen_rtx_SET (VOIDmode, target,
23503 gen_rtx_VEC_CONCAT (mode, op0, op1)));
23504 }
23505 else
23506 {
23507 int i, j, n_elts, n_words, n_elt_per_word;
23508 enum machine_mode inner_mode;
23509 rtx words[4], shift;
23510
23511 inner_mode = GET_MODE_INNER (mode);
23512 n_elts = GET_MODE_NUNITS (mode);
23513 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
23514 n_elt_per_word = n_elts / n_words;
23515 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
23516
23517 for (i = 0; i < n_words; ++i)
23518 {
23519 rtx word = NULL_RTX;
23520
23521 for (j = 0; j < n_elt_per_word; ++j)
23522 {
23523 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
23524 elt = convert_modes (word_mode, inner_mode, elt, true);
23525
23526 if (j == 0)
23527 word = elt;
23528 else
23529 {
23530 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
23531 word, 1, OPTAB_LIB_WIDEN);
23532 word = expand_simple_binop (word_mode, IOR, word, elt,
23533 word, 1, OPTAB_LIB_WIDEN);
23534 }
23535 }
23536
23537 words[i] = word;
23538 }
23539
23540 if (n_words == 1)
23541 emit_move_insn (target, gen_lowpart (mode, words[0]));
23542 else if (n_words == 2)
23543 {
23544 rtx tmp = gen_reg_rtx (mode);
23545 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
23546 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
23547 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
23548 emit_move_insn (target, tmp);
23549 }
23550 else if (n_words == 4)
23551 {
23552 rtx tmp = gen_reg_rtx (V4SImode);
23553 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
23554 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
23555 emit_move_insn (target, gen_lowpart (mode, tmp));
23556 }
23557 else
23558 gcc_unreachable ();
23559 }
23560 }
23561
23562 /* Initialize vector TARGET via VALS. Suppress the use of MMX
23563 instructions unless MMX_OK is true. */
23564
23565 void
23566 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
23567 {
23568 enum machine_mode mode = GET_MODE (target);
23569 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23570 int n_elts = GET_MODE_NUNITS (mode);
23571 int n_var = 0, one_var = -1;
23572 bool all_same = true, all_const_zero = true;
23573 int i;
23574 rtx x;
23575
23576 for (i = 0; i < n_elts; ++i)
23577 {
23578 x = XVECEXP (vals, 0, i);
23579 if (!CONSTANT_P (x))
23580 n_var++, one_var = i;
23581 else if (x != CONST0_RTX (inner_mode))
23582 all_const_zero = false;
23583 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
23584 all_same = false;
23585 }
23586
23587 /* Constants are best loaded from the constant pool. */
23588 if (n_var == 0)
23589 {
23590 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
23591 return;
23592 }
23593
23594 /* If all values are identical, broadcast the value. */
23595 if (all_same
23596 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
23597 XVECEXP (vals, 0, 0)))
23598 return;
23599
23600 /* Values where only one field is non-constant are best loaded from
23601 the pool and overwritten via move later. */
23602 if (n_var == 1)
23603 {
23604 if (all_const_zero
23605 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
23606 XVECEXP (vals, 0, one_var),
23607 one_var))
23608 return;
23609
23610 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
23611 return;
23612 }
23613
23614 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
23615 }
23616
23617 void
23618 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
23619 {
23620 enum machine_mode mode = GET_MODE (target);
23621 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23622 bool use_vec_merge = false;
23623 rtx tmp;
23624
23625 switch (mode)
23626 {
23627 case V2SFmode:
23628 case V2SImode:
23629 if (mmx_ok)
23630 {
23631 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
23632 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
23633 if (elt == 0)
23634 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
23635 else
23636 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
23637 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23638 return;
23639 }
23640 break;
23641
23642 case V2DImode:
23643 use_vec_merge = TARGET_SSE4_1;
23644 if (use_vec_merge)
23645 break;
23646
23647 case V2DFmode:
23648 {
23649 rtx op0, op1;
23650
23651 /* For the two element vectors, we implement a VEC_CONCAT with
23652 the extraction of the other element. */
23653
23654 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
23655 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
23656
23657 if (elt == 0)
23658 op0 = val, op1 = tmp;
23659 else
23660 op0 = tmp, op1 = val;
23661
23662 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
23663 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23664 }
23665 return;
23666
23667 case V4SFmode:
23668 use_vec_merge = TARGET_SSE4_1;
23669 if (use_vec_merge)
23670 break;
23671
23672 switch (elt)
23673 {
23674 case 0:
23675 use_vec_merge = true;
23676 break;
23677
23678 case 1:
23679 /* tmp = target = A B C D */
23680 tmp = copy_to_reg (target);
23681 /* target = A A B B */
23682 emit_insn (gen_sse_unpcklps (target, target, target));
23683 /* target = X A B B */
23684 ix86_expand_vector_set (false, target, val, 0);
23685 /* target = A X C D */
23686 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23687 GEN_INT (1), GEN_INT (0),
23688 GEN_INT (2+4), GEN_INT (3+4)));
23689 return;
23690
23691 case 2:
23692 /* tmp = target = A B C D */
23693 tmp = copy_to_reg (target);
23694 /* tmp = X B C D */
23695 ix86_expand_vector_set (false, tmp, val, 0);
23696 /* target = A B X D */
23697 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23698 GEN_INT (0), GEN_INT (1),
23699 GEN_INT (0+4), GEN_INT (3+4)));
23700 return;
23701
23702 case 3:
23703 /* tmp = target = A B C D */
23704 tmp = copy_to_reg (target);
23705 /* tmp = X B C D */
23706 ix86_expand_vector_set (false, tmp, val, 0);
23707 /* target = A B X D */
23708 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23709 GEN_INT (0), GEN_INT (1),
23710 GEN_INT (2+4), GEN_INT (0+4)));
23711 return;
23712
23713 default:
23714 gcc_unreachable ();
23715 }
23716 break;
23717
23718 case V4SImode:
23719 use_vec_merge = TARGET_SSE4_1;
23720 if (use_vec_merge)
23721 break;
23722
23723 /* Element 0 handled by vec_merge below. */
23724 if (elt == 0)
23725 {
23726 use_vec_merge = true;
23727 break;
23728 }
23729
23730 if (TARGET_SSE2)
23731 {
23732 /* With SSE2, use integer shuffles to swap element 0 and ELT,
23733 store into element 0, then shuffle them back. */
23734
23735 rtx order[4];
23736
23737 order[0] = GEN_INT (elt);
23738 order[1] = const1_rtx;
23739 order[2] = const2_rtx;
23740 order[3] = GEN_INT (3);
23741 order[elt] = const0_rtx;
23742
23743 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
23744 order[1], order[2], order[3]));
23745
23746 ix86_expand_vector_set (false, target, val, 0);
23747
23748 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
23749 order[1], order[2], order[3]));
23750 }
23751 else
23752 {
23753 /* For SSE1, we have to reuse the V4SF code. */
23754 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
23755 gen_lowpart (SFmode, val), elt);
23756 }
23757 return;
23758
23759 case V8HImode:
23760 use_vec_merge = TARGET_SSE2;
23761 break;
23762 case V4HImode:
23763 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
23764 break;
23765
23766 case V16QImode:
23767 use_vec_merge = TARGET_SSE4_1;
23768 break;
23769
23770 case V8QImode:
23771 default:
23772 break;
23773 }
23774
23775 if (use_vec_merge)
23776 {
23777 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
23778 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
23779 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23780 }
23781 else
23782 {
23783 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
23784
23785 emit_move_insn (mem, target);
23786
23787 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
23788 emit_move_insn (tmp, val);
23789
23790 emit_move_insn (target, mem);
23791 }
23792 }
23793
23794 void
23795 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
23796 {
23797 enum machine_mode mode = GET_MODE (vec);
23798 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23799 bool use_vec_extr = false;
23800 rtx tmp;
23801
23802 switch (mode)
23803 {
23804 case V2SImode:
23805 case V2SFmode:
23806 if (!mmx_ok)
23807 break;
23808 /* FALLTHRU */
23809
23810 case V2DFmode:
23811 case V2DImode:
23812 use_vec_extr = true;
23813 break;
23814
23815 case V4SFmode:
23816 use_vec_extr = TARGET_SSE4_1;
23817 if (use_vec_extr)
23818 break;
23819
23820 switch (elt)
23821 {
23822 case 0:
23823 tmp = vec;
23824 break;
23825
23826 case 1:
23827 case 3:
23828 tmp = gen_reg_rtx (mode);
23829 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
23830 GEN_INT (elt), GEN_INT (elt),
23831 GEN_INT (elt+4), GEN_INT (elt+4)));
23832 break;
23833
23834 case 2:
23835 tmp = gen_reg_rtx (mode);
23836 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
23837 break;
23838
23839 default:
23840 gcc_unreachable ();
23841 }
23842 vec = tmp;
23843 use_vec_extr = true;
23844 elt = 0;
23845 break;
23846
23847 case V4SImode:
23848 use_vec_extr = TARGET_SSE4_1;
23849 if (use_vec_extr)
23850 break;
23851
23852 if (TARGET_SSE2)
23853 {
23854 switch (elt)
23855 {
23856 case 0:
23857 tmp = vec;
23858 break;
23859
23860 case 1:
23861 case 3:
23862 tmp = gen_reg_rtx (mode);
23863 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
23864 GEN_INT (elt), GEN_INT (elt),
23865 GEN_INT (elt), GEN_INT (elt)));
23866 break;
23867
23868 case 2:
23869 tmp = gen_reg_rtx (mode);
23870 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
23871 break;
23872
23873 default:
23874 gcc_unreachable ();
23875 }
23876 vec = tmp;
23877 use_vec_extr = true;
23878 elt = 0;
23879 }
23880 else
23881 {
23882 /* For SSE1, we have to reuse the V4SF code. */
23883 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
23884 gen_lowpart (V4SFmode, vec), elt);
23885 return;
23886 }
23887 break;
23888
23889 case V8HImode:
23890 use_vec_extr = TARGET_SSE2;
23891 break;
23892 case V4HImode:
23893 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
23894 break;
23895
23896 case V16QImode:
23897 use_vec_extr = TARGET_SSE4_1;
23898 break;
23899
23900 case V8QImode:
23901 /* ??? Could extract the appropriate HImode element and shift. */
23902 default:
23903 break;
23904 }
23905
23906 if (use_vec_extr)
23907 {
23908 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
23909 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
23910
23911 /* Let the rtl optimizers know about the zero extension performed. */
23912 if (inner_mode == QImode || inner_mode == HImode)
23913 {
23914 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
23915 target = gen_lowpart (SImode, target);
23916 }
23917
23918 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23919 }
23920 else
23921 {
23922 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
23923
23924 emit_move_insn (mem, vec);
23925
23926 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
23927 emit_move_insn (target, tmp);
23928 }
23929 }
23930
23931 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
23932 pattern to reduce; DEST is the destination; IN is the input vector. */
23933
23934 void
23935 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
23936 {
23937 rtx tmp1, tmp2, tmp3;
23938
23939 tmp1 = gen_reg_rtx (V4SFmode);
23940 tmp2 = gen_reg_rtx (V4SFmode);
23941 tmp3 = gen_reg_rtx (V4SFmode);
23942
23943 emit_insn (gen_sse_movhlps (tmp1, in, in));
23944 emit_insn (fn (tmp2, tmp1, in));
23945
23946 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
23947 GEN_INT (1), GEN_INT (1),
23948 GEN_INT (1+4), GEN_INT (1+4)));
23949 emit_insn (fn (dest, tmp2, tmp3));
23950 }
23951 \f
23952 /* Target hook for scalar_mode_supported_p. */
23953 static bool
23954 ix86_scalar_mode_supported_p (enum machine_mode mode)
23955 {
23956 if (DECIMAL_FLOAT_MODE_P (mode))
23957 return true;
23958 else if (mode == TFmode)
23959 return TARGET_64BIT;
23960 else
23961 return default_scalar_mode_supported_p (mode);
23962 }
23963
23964 /* Implements target hook vector_mode_supported_p. */
23965 static bool
23966 ix86_vector_mode_supported_p (enum machine_mode mode)
23967 {
23968 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
23969 return true;
23970 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
23971 return true;
23972 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
23973 return true;
23974 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
23975 return true;
23976 return false;
23977 }
23978
23979 /* Target hook for c_mode_for_suffix. */
23980 static enum machine_mode
23981 ix86_c_mode_for_suffix (char suffix)
23982 {
23983 if (TARGET_64BIT && suffix == 'q')
23984 return TFmode;
23985 if (TARGET_MMX && suffix == 'w')
23986 return XFmode;
23987
23988 return VOIDmode;
23989 }
23990
23991 /* Worker function for TARGET_MD_ASM_CLOBBERS.
23992
23993 We do this in the new i386 backend to maintain source compatibility
23994 with the old cc0-based compiler. */
23995
23996 static tree
23997 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
23998 tree inputs ATTRIBUTE_UNUSED,
23999 tree clobbers)
24000 {
24001 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
24002 clobbers);
24003 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
24004 clobbers);
24005 return clobbers;
24006 }
24007
24008 /* Implements target vector targetm.asm.encode_section_info. This
24009 is not used by netware. */
24010
24011 static void ATTRIBUTE_UNUSED
24012 ix86_encode_section_info (tree decl, rtx rtl, int first)
24013 {
24014 default_encode_section_info (decl, rtl, first);
24015
24016 if (TREE_CODE (decl) == VAR_DECL
24017 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
24018 && ix86_in_large_data_p (decl))
24019 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
24020 }
24021
24022 /* Worker function for REVERSE_CONDITION. */
24023
24024 enum rtx_code
24025 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
24026 {
24027 return (mode != CCFPmode && mode != CCFPUmode
24028 ? reverse_condition (code)
24029 : reverse_condition_maybe_unordered (code));
24030 }
24031
24032 /* Output code to perform an x87 FP register move, from OPERANDS[1]
24033 to OPERANDS[0]. */
24034
24035 const char *
24036 output_387_reg_move (rtx insn, rtx *operands)
24037 {
24038 if (REG_P (operands[0]))
24039 {
24040 if (REG_P (operands[1])
24041 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24042 {
24043 if (REGNO (operands[0]) == FIRST_STACK_REG)
24044 return output_387_ffreep (operands, 0);
24045 return "fstp\t%y0";
24046 }
24047 if (STACK_TOP_P (operands[0]))
24048 return "fld%z1\t%y1";
24049 return "fst\t%y0";
24050 }
24051 else if (MEM_P (operands[0]))
24052 {
24053 gcc_assert (REG_P (operands[1]));
24054 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24055 return "fstp%z0\t%y0";
24056 else
24057 {
24058 /* There is no non-popping store to memory for XFmode.
24059 So if we need one, follow the store with a load. */
24060 if (GET_MODE (operands[0]) == XFmode)
24061 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
24062 else
24063 return "fst%z0\t%y0";
24064 }
24065 }
24066 else
24067 gcc_unreachable();
24068 }
24069
24070 /* Output code to perform a conditional jump to LABEL, if C2 flag in
24071 FP status register is set. */
24072
24073 void
24074 ix86_emit_fp_unordered_jump (rtx label)
24075 {
24076 rtx reg = gen_reg_rtx (HImode);
24077 rtx temp;
24078
24079 emit_insn (gen_x86_fnstsw_1 (reg));
24080
24081 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
24082 {
24083 emit_insn (gen_x86_sahf_1 (reg));
24084
24085 temp = gen_rtx_REG (CCmode, FLAGS_REG);
24086 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
24087 }
24088 else
24089 {
24090 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
24091
24092 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24093 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
24094 }
24095
24096 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
24097 gen_rtx_LABEL_REF (VOIDmode, label),
24098 pc_rtx);
24099 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
24100
24101 emit_jump_insn (temp);
24102 predict_jump (REG_BR_PROB_BASE * 10 / 100);
24103 }
24104
24105 /* Output code to perform a log1p XFmode calculation. */
24106
24107 void ix86_emit_i387_log1p (rtx op0, rtx op1)
24108 {
24109 rtx label1 = gen_label_rtx ();
24110 rtx label2 = gen_label_rtx ();
24111
24112 rtx tmp = gen_reg_rtx (XFmode);
24113 rtx tmp2 = gen_reg_rtx (XFmode);
24114
24115 emit_insn (gen_absxf2 (tmp, op1));
24116 emit_insn (gen_cmpxf (tmp,
24117 CONST_DOUBLE_FROM_REAL_VALUE (
24118 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
24119 XFmode)));
24120 emit_jump_insn (gen_bge (label1));
24121
24122 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24123 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
24124 emit_jump (label2);
24125
24126 emit_label (label1);
24127 emit_move_insn (tmp, CONST1_RTX (XFmode));
24128 emit_insn (gen_addxf3 (tmp, op1, tmp));
24129 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24130 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
24131
24132 emit_label (label2);
24133 }
24134
24135 /* Output code to perform a Newton-Rhapson approximation of a single precision
24136 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
24137
24138 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
24139 {
24140 rtx x0, x1, e0, e1, two;
24141
24142 x0 = gen_reg_rtx (mode);
24143 e0 = gen_reg_rtx (mode);
24144 e1 = gen_reg_rtx (mode);
24145 x1 = gen_reg_rtx (mode);
24146
24147 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
24148
24149 if (VECTOR_MODE_P (mode))
24150 two = ix86_build_const_vector (SFmode, true, two);
24151
24152 two = force_reg (mode, two);
24153
24154 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24155
24156 /* x0 = 1./b estimate */
24157 emit_insn (gen_rtx_SET (VOIDmode, x0,
24158 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
24159 UNSPEC_RCP)));
24160 /* e0 = x0 * b */
24161 emit_insn (gen_rtx_SET (VOIDmode, e0,
24162 gen_rtx_MULT (mode, x0, b)));
24163 /* e1 = 2. - e0 */
24164 emit_insn (gen_rtx_SET (VOIDmode, e1,
24165 gen_rtx_MINUS (mode, two, e0)));
24166 /* x1 = x0 * e1 */
24167 emit_insn (gen_rtx_SET (VOIDmode, x1,
24168 gen_rtx_MULT (mode, x0, e1)));
24169 /* res = a * x1 */
24170 emit_insn (gen_rtx_SET (VOIDmode, res,
24171 gen_rtx_MULT (mode, a, x1)));
24172 }
24173
24174 /* Output code to perform a Newton-Rhapson approximation of a
24175 single precision floating point [reciprocal] square root. */
24176
24177 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
24178 bool recip)
24179 {
24180 rtx x0, e0, e1, e2, e3, three, half, zero, mask;
24181
24182 x0 = gen_reg_rtx (mode);
24183 e0 = gen_reg_rtx (mode);
24184 e1 = gen_reg_rtx (mode);
24185 e2 = gen_reg_rtx (mode);
24186 e3 = gen_reg_rtx (mode);
24187
24188 three = CONST_DOUBLE_FROM_REAL_VALUE (dconst3, SFmode);
24189 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode);
24190
24191 mask = gen_reg_rtx (mode);
24192
24193 if (VECTOR_MODE_P (mode))
24194 {
24195 three = ix86_build_const_vector (SFmode, true, three);
24196 half = ix86_build_const_vector (SFmode, true, half);
24197 }
24198
24199 three = force_reg (mode, three);
24200 half = force_reg (mode, half);
24201
24202 zero = force_reg (mode, CONST0_RTX(mode));
24203
24204 /* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
24205 1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
24206
24207 /* Compare a to zero. */
24208 emit_insn (gen_rtx_SET (VOIDmode, mask,
24209 gen_rtx_NE (mode, a, zero)));
24210
24211 /* x0 = 1./sqrt(a) estimate */
24212 emit_insn (gen_rtx_SET (VOIDmode, x0,
24213 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
24214 UNSPEC_RSQRT)));
24215 /* Filter out infinity. */
24216 if (VECTOR_MODE_P (mode))
24217 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (V4SFmode, x0),
24218 gen_rtx_AND (mode,
24219 gen_lowpart (V4SFmode, x0),
24220 gen_lowpart (V4SFmode, mask))));
24221 else
24222 emit_insn (gen_rtx_SET (VOIDmode, x0,
24223 gen_rtx_AND (mode, x0, mask)));
24224
24225 /* e0 = x0 * a */
24226 emit_insn (gen_rtx_SET (VOIDmode, e0,
24227 gen_rtx_MULT (mode, x0, a)));
24228 /* e1 = e0 * x0 */
24229 emit_insn (gen_rtx_SET (VOIDmode, e1,
24230 gen_rtx_MULT (mode, e0, x0)));
24231 /* e2 = 3. - e1 */
24232 emit_insn (gen_rtx_SET (VOIDmode, e2,
24233 gen_rtx_MINUS (mode, three, e1)));
24234 if (recip)
24235 /* e3 = .5 * x0 */
24236 emit_insn (gen_rtx_SET (VOIDmode, e3,
24237 gen_rtx_MULT (mode, half, x0)));
24238 else
24239 /* e3 = .5 * e0 */
24240 emit_insn (gen_rtx_SET (VOIDmode, e3,
24241 gen_rtx_MULT (mode, half, e0)));
24242 /* ret = e2 * e3 */
24243 emit_insn (gen_rtx_SET (VOIDmode, res,
24244 gen_rtx_MULT (mode, e2, e3)));
24245 }
24246
24247 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
24248
24249 static void ATTRIBUTE_UNUSED
24250 i386_solaris_elf_named_section (const char *name, unsigned int flags,
24251 tree decl)
24252 {
24253 /* With Binutils 2.15, the "@unwind" marker must be specified on
24254 every occurrence of the ".eh_frame" section, not just the first
24255 one. */
24256 if (TARGET_64BIT
24257 && strcmp (name, ".eh_frame") == 0)
24258 {
24259 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
24260 flags & SECTION_WRITE ? "aw" : "a");
24261 return;
24262 }
24263 default_elf_asm_named_section (name, flags, decl);
24264 }
24265
24266 /* Return the mangling of TYPE if it is an extended fundamental type. */
24267
24268 static const char *
24269 ix86_mangle_type (const_tree type)
24270 {
24271 type = TYPE_MAIN_VARIANT (type);
24272
24273 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
24274 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
24275 return NULL;
24276
24277 switch (TYPE_MODE (type))
24278 {
24279 case TFmode:
24280 /* __float128 is "g". */
24281 return "g";
24282 case XFmode:
24283 /* "long double" or __float80 is "e". */
24284 return "e";
24285 default:
24286 return NULL;
24287 }
24288 }
24289
24290 /* For 32-bit code we can save PIC register setup by using
24291 __stack_chk_fail_local hidden function instead of calling
24292 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
24293 register, so it is better to call __stack_chk_fail directly. */
24294
24295 static tree
24296 ix86_stack_protect_fail (void)
24297 {
24298 return TARGET_64BIT
24299 ? default_external_stack_protect_fail ()
24300 : default_hidden_stack_protect_fail ();
24301 }
24302
24303 /* Select a format to encode pointers in exception handling data. CODE
24304 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
24305 true if the symbol may be affected by dynamic relocations.
24306
24307 ??? All x86 object file formats are capable of representing this.
24308 After all, the relocation needed is the same as for the call insn.
24309 Whether or not a particular assembler allows us to enter such, I
24310 guess we'll have to see. */
24311 int
24312 asm_preferred_eh_data_format (int code, int global)
24313 {
24314 if (flag_pic)
24315 {
24316 int type = DW_EH_PE_sdata8;
24317 if (!TARGET_64BIT
24318 || ix86_cmodel == CM_SMALL_PIC
24319 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
24320 type = DW_EH_PE_sdata4;
24321 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
24322 }
24323 if (ix86_cmodel == CM_SMALL
24324 || (ix86_cmodel == CM_MEDIUM && code))
24325 return DW_EH_PE_udata4;
24326 return DW_EH_PE_absptr;
24327 }
24328 \f
24329 /* Expand copysign from SIGN to the positive value ABS_VALUE
24330 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
24331 the sign-bit. */
24332 static void
24333 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
24334 {
24335 enum machine_mode mode = GET_MODE (sign);
24336 rtx sgn = gen_reg_rtx (mode);
24337 if (mask == NULL_RTX)
24338 {
24339 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
24340 if (!VECTOR_MODE_P (mode))
24341 {
24342 /* We need to generate a scalar mode mask in this case. */
24343 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24344 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24345 mask = gen_reg_rtx (mode);
24346 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24347 }
24348 }
24349 else
24350 mask = gen_rtx_NOT (mode, mask);
24351 emit_insn (gen_rtx_SET (VOIDmode, sgn,
24352 gen_rtx_AND (mode, mask, sign)));
24353 emit_insn (gen_rtx_SET (VOIDmode, result,
24354 gen_rtx_IOR (mode, abs_value, sgn)));
24355 }
24356
24357 /* Expand fabs (OP0) and return a new rtx that holds the result. The
24358 mask for masking out the sign-bit is stored in *SMASK, if that is
24359 non-null. */
24360 static rtx
24361 ix86_expand_sse_fabs (rtx op0, rtx *smask)
24362 {
24363 enum machine_mode mode = GET_MODE (op0);
24364 rtx xa, mask;
24365
24366 xa = gen_reg_rtx (mode);
24367 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
24368 if (!VECTOR_MODE_P (mode))
24369 {
24370 /* We need to generate a scalar mode mask in this case. */
24371 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24372 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24373 mask = gen_reg_rtx (mode);
24374 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24375 }
24376 emit_insn (gen_rtx_SET (VOIDmode, xa,
24377 gen_rtx_AND (mode, op0, mask)));
24378
24379 if (smask)
24380 *smask = mask;
24381
24382 return xa;
24383 }
24384
24385 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
24386 swapping the operands if SWAP_OPERANDS is true. The expanded
24387 code is a forward jump to a newly created label in case the
24388 comparison is true. The generated label rtx is returned. */
24389 static rtx
24390 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
24391 bool swap_operands)
24392 {
24393 rtx label, tmp;
24394
24395 if (swap_operands)
24396 {
24397 tmp = op0;
24398 op0 = op1;
24399 op1 = tmp;
24400 }
24401
24402 label = gen_label_rtx ();
24403 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
24404 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24405 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
24406 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
24407 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24408 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
24409 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24410 JUMP_LABEL (tmp) = label;
24411
24412 return label;
24413 }
24414
24415 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
24416 using comparison code CODE. Operands are swapped for the comparison if
24417 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
24418 static rtx
24419 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
24420 bool swap_operands)
24421 {
24422 enum machine_mode mode = GET_MODE (op0);
24423 rtx mask = gen_reg_rtx (mode);
24424
24425 if (swap_operands)
24426 {
24427 rtx tmp = op0;
24428 op0 = op1;
24429 op1 = tmp;
24430 }
24431
24432 if (mode == DFmode)
24433 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
24434 gen_rtx_fmt_ee (code, mode, op0, op1)));
24435 else
24436 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
24437 gen_rtx_fmt_ee (code, mode, op0, op1)));
24438
24439 return mask;
24440 }
24441
24442 /* Generate and return a rtx of mode MODE for 2**n where n is the number
24443 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
24444 static rtx
24445 ix86_gen_TWO52 (enum machine_mode mode)
24446 {
24447 REAL_VALUE_TYPE TWO52r;
24448 rtx TWO52;
24449
24450 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
24451 TWO52 = const_double_from_real_value (TWO52r, mode);
24452 TWO52 = force_reg (mode, TWO52);
24453
24454 return TWO52;
24455 }
24456
24457 /* Expand SSE sequence for computing lround from OP1 storing
24458 into OP0. */
24459 void
24460 ix86_expand_lround (rtx op0, rtx op1)
24461 {
24462 /* C code for the stuff we're doing below:
24463 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
24464 return (long)tmp;
24465 */
24466 enum machine_mode mode = GET_MODE (op1);
24467 const struct real_format *fmt;
24468 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
24469 rtx adj;
24470
24471 /* load nextafter (0.5, 0.0) */
24472 fmt = REAL_MODE_FORMAT (mode);
24473 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
24474 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
24475
24476 /* adj = copysign (0.5, op1) */
24477 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
24478 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
24479
24480 /* adj = op1 + adj */
24481 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
24482
24483 /* op0 = (imode)adj */
24484 expand_fix (op0, adj, 0);
24485 }
24486
24487 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
24488 into OPERAND0. */
24489 void
24490 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
24491 {
24492 /* C code for the stuff we're doing below (for do_floor):
24493 xi = (long)op1;
24494 xi -= (double)xi > op1 ? 1 : 0;
24495 return xi;
24496 */
24497 enum machine_mode fmode = GET_MODE (op1);
24498 enum machine_mode imode = GET_MODE (op0);
24499 rtx ireg, freg, label, tmp;
24500
24501 /* reg = (long)op1 */
24502 ireg = gen_reg_rtx (imode);
24503 expand_fix (ireg, op1, 0);
24504
24505 /* freg = (double)reg */
24506 freg = gen_reg_rtx (fmode);
24507 expand_float (freg, ireg, 0);
24508
24509 /* ireg = (freg > op1) ? ireg - 1 : ireg */
24510 label = ix86_expand_sse_compare_and_jump (UNLE,
24511 freg, op1, !do_floor);
24512 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
24513 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
24514 emit_move_insn (ireg, tmp);
24515
24516 emit_label (label);
24517 LABEL_NUSES (label) = 1;
24518
24519 emit_move_insn (op0, ireg);
24520 }
24521
24522 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
24523 result in OPERAND0. */
24524 void
24525 ix86_expand_rint (rtx operand0, rtx operand1)
24526 {
24527 /* C code for the stuff we're doing below:
24528 xa = fabs (operand1);
24529 if (!isless (xa, 2**52))
24530 return operand1;
24531 xa = xa + 2**52 - 2**52;
24532 return copysign (xa, operand1);
24533 */
24534 enum machine_mode mode = GET_MODE (operand0);
24535 rtx res, xa, label, TWO52, mask;
24536
24537 res = gen_reg_rtx (mode);
24538 emit_move_insn (res, operand1);
24539
24540 /* xa = abs (operand1) */
24541 xa = ix86_expand_sse_fabs (res, &mask);
24542
24543 /* if (!isless (xa, TWO52)) goto label; */
24544 TWO52 = ix86_gen_TWO52 (mode);
24545 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24546
24547 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24548 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24549
24550 ix86_sse_copysign_to_positive (res, xa, res, mask);
24551
24552 emit_label (label);
24553 LABEL_NUSES (label) = 1;
24554
24555 emit_move_insn (operand0, res);
24556 }
24557
24558 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24559 into OPERAND0. */
24560 void
24561 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
24562 {
24563 /* C code for the stuff we expand below.
24564 double xa = fabs (x), x2;
24565 if (!isless (xa, TWO52))
24566 return x;
24567 xa = xa + TWO52 - TWO52;
24568 x2 = copysign (xa, x);
24569 Compensate. Floor:
24570 if (x2 > x)
24571 x2 -= 1;
24572 Compensate. Ceil:
24573 if (x2 < x)
24574 x2 -= -1;
24575 return x2;
24576 */
24577 enum machine_mode mode = GET_MODE (operand0);
24578 rtx xa, TWO52, tmp, label, one, res, mask;
24579
24580 TWO52 = ix86_gen_TWO52 (mode);
24581
24582 /* Temporary for holding the result, initialized to the input
24583 operand to ease control flow. */
24584 res = gen_reg_rtx (mode);
24585 emit_move_insn (res, operand1);
24586
24587 /* xa = abs (operand1) */
24588 xa = ix86_expand_sse_fabs (res, &mask);
24589
24590 /* if (!isless (xa, TWO52)) goto label; */
24591 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24592
24593 /* xa = xa + TWO52 - TWO52; */
24594 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24595 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24596
24597 /* xa = copysign (xa, operand1) */
24598 ix86_sse_copysign_to_positive (xa, xa, res, mask);
24599
24600 /* generate 1.0 or -1.0 */
24601 one = force_reg (mode,
24602 const_double_from_real_value (do_floor
24603 ? dconst1 : dconstm1, mode));
24604
24605 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24606 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
24607 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24608 gen_rtx_AND (mode, one, tmp)));
24609 /* We always need to subtract here to preserve signed zero. */
24610 tmp = expand_simple_binop (mode, MINUS,
24611 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24612 emit_move_insn (res, tmp);
24613
24614 emit_label (label);
24615 LABEL_NUSES (label) = 1;
24616
24617 emit_move_insn (operand0, res);
24618 }
24619
24620 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24621 into OPERAND0. */
24622 void
24623 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
24624 {
24625 /* C code for the stuff we expand below.
24626 double xa = fabs (x), x2;
24627 if (!isless (xa, TWO52))
24628 return x;
24629 x2 = (double)(long)x;
24630 Compensate. Floor:
24631 if (x2 > x)
24632 x2 -= 1;
24633 Compensate. Ceil:
24634 if (x2 < x)
24635 x2 += 1;
24636 if (HONOR_SIGNED_ZEROS (mode))
24637 return copysign (x2, x);
24638 return x2;
24639 */
24640 enum machine_mode mode = GET_MODE (operand0);
24641 rtx xa, xi, TWO52, tmp, label, one, res, mask;
24642
24643 TWO52 = ix86_gen_TWO52 (mode);
24644
24645 /* Temporary for holding the result, initialized to the input
24646 operand to ease control flow. */
24647 res = gen_reg_rtx (mode);
24648 emit_move_insn (res, operand1);
24649
24650 /* xa = abs (operand1) */
24651 xa = ix86_expand_sse_fabs (res, &mask);
24652
24653 /* if (!isless (xa, TWO52)) goto label; */
24654 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24655
24656 /* xa = (double)(long)x */
24657 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24658 expand_fix (xi, res, 0);
24659 expand_float (xa, xi, 0);
24660
24661 /* generate 1.0 */
24662 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
24663
24664 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24665 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
24666 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24667 gen_rtx_AND (mode, one, tmp)));
24668 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
24669 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24670 emit_move_insn (res, tmp);
24671
24672 if (HONOR_SIGNED_ZEROS (mode))
24673 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
24674
24675 emit_label (label);
24676 LABEL_NUSES (label) = 1;
24677
24678 emit_move_insn (operand0, res);
24679 }
24680
24681 /* Expand SSE sequence for computing round from OPERAND1 storing
24682 into OPERAND0. Sequence that works without relying on DImode truncation
24683 via cvttsd2siq that is only available on 64bit targets. */
24684 void
24685 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
24686 {
24687 /* C code for the stuff we expand below.
24688 double xa = fabs (x), xa2, x2;
24689 if (!isless (xa, TWO52))
24690 return x;
24691 Using the absolute value and copying back sign makes
24692 -0.0 -> -0.0 correct.
24693 xa2 = xa + TWO52 - TWO52;
24694 Compensate.
24695 dxa = xa2 - xa;
24696 if (dxa <= -0.5)
24697 xa2 += 1;
24698 else if (dxa > 0.5)
24699 xa2 -= 1;
24700 x2 = copysign (xa2, x);
24701 return x2;
24702 */
24703 enum machine_mode mode = GET_MODE (operand0);
24704 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
24705
24706 TWO52 = ix86_gen_TWO52 (mode);
24707
24708 /* Temporary for holding the result, initialized to the input
24709 operand to ease control flow. */
24710 res = gen_reg_rtx (mode);
24711 emit_move_insn (res, operand1);
24712
24713 /* xa = abs (operand1) */
24714 xa = ix86_expand_sse_fabs (res, &mask);
24715
24716 /* if (!isless (xa, TWO52)) goto label; */
24717 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24718
24719 /* xa2 = xa + TWO52 - TWO52; */
24720 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24721 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
24722
24723 /* dxa = xa2 - xa; */
24724 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
24725
24726 /* generate 0.5, 1.0 and -0.5 */
24727 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
24728 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
24729 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
24730 0, OPTAB_DIRECT);
24731
24732 /* Compensate. */
24733 tmp = gen_reg_rtx (mode);
24734 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
24735 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
24736 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24737 gen_rtx_AND (mode, one, tmp)));
24738 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24739 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
24740 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
24741 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24742 gen_rtx_AND (mode, one, tmp)));
24743 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24744
24745 /* res = copysign (xa2, operand1) */
24746 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
24747
24748 emit_label (label);
24749 LABEL_NUSES (label) = 1;
24750
24751 emit_move_insn (operand0, res);
24752 }
24753
24754 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24755 into OPERAND0. */
24756 void
24757 ix86_expand_trunc (rtx operand0, rtx operand1)
24758 {
24759 /* C code for SSE variant we expand below.
24760 double xa = fabs (x), x2;
24761 if (!isless (xa, TWO52))
24762 return x;
24763 x2 = (double)(long)x;
24764 if (HONOR_SIGNED_ZEROS (mode))
24765 return copysign (x2, x);
24766 return x2;
24767 */
24768 enum machine_mode mode = GET_MODE (operand0);
24769 rtx xa, xi, TWO52, label, res, mask;
24770
24771 TWO52 = ix86_gen_TWO52 (mode);
24772
24773 /* Temporary for holding the result, initialized to the input
24774 operand to ease control flow. */
24775 res = gen_reg_rtx (mode);
24776 emit_move_insn (res, operand1);
24777
24778 /* xa = abs (operand1) */
24779 xa = ix86_expand_sse_fabs (res, &mask);
24780
24781 /* if (!isless (xa, TWO52)) goto label; */
24782 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24783
24784 /* x = (double)(long)x */
24785 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24786 expand_fix (xi, res, 0);
24787 expand_float (res, xi, 0);
24788
24789 if (HONOR_SIGNED_ZEROS (mode))
24790 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
24791
24792 emit_label (label);
24793 LABEL_NUSES (label) = 1;
24794
24795 emit_move_insn (operand0, res);
24796 }
24797
24798 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24799 into OPERAND0. */
24800 void
24801 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
24802 {
24803 enum machine_mode mode = GET_MODE (operand0);
24804 rtx xa, mask, TWO52, label, one, res, smask, tmp;
24805
24806 /* C code for SSE variant we expand below.
24807 double xa = fabs (x), x2;
24808 if (!isless (xa, TWO52))
24809 return x;
24810 xa2 = xa + TWO52 - TWO52;
24811 Compensate:
24812 if (xa2 > xa)
24813 xa2 -= 1.0;
24814 x2 = copysign (xa2, x);
24815 return x2;
24816 */
24817
24818 TWO52 = ix86_gen_TWO52 (mode);
24819
24820 /* Temporary for holding the result, initialized to the input
24821 operand to ease control flow. */
24822 res = gen_reg_rtx (mode);
24823 emit_move_insn (res, operand1);
24824
24825 /* xa = abs (operand1) */
24826 xa = ix86_expand_sse_fabs (res, &smask);
24827
24828 /* if (!isless (xa, TWO52)) goto label; */
24829 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24830
24831 /* res = xa + TWO52 - TWO52; */
24832 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24833 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
24834 emit_move_insn (res, tmp);
24835
24836 /* generate 1.0 */
24837 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
24838
24839 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
24840 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
24841 emit_insn (gen_rtx_SET (VOIDmode, mask,
24842 gen_rtx_AND (mode, mask, one)));
24843 tmp = expand_simple_binop (mode, MINUS,
24844 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
24845 emit_move_insn (res, tmp);
24846
24847 /* res = copysign (res, operand1) */
24848 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
24849
24850 emit_label (label);
24851 LABEL_NUSES (label) = 1;
24852
24853 emit_move_insn (operand0, res);
24854 }
24855
24856 /* Expand SSE sequence for computing round from OPERAND1 storing
24857 into OPERAND0. */
24858 void
24859 ix86_expand_round (rtx operand0, rtx operand1)
24860 {
24861 /* C code for the stuff we're doing below:
24862 double xa = fabs (x);
24863 if (!isless (xa, TWO52))
24864 return x;
24865 xa = (double)(long)(xa + nextafter (0.5, 0.0));
24866 return copysign (xa, x);
24867 */
24868 enum machine_mode mode = GET_MODE (operand0);
24869 rtx res, TWO52, xa, label, xi, half, mask;
24870 const struct real_format *fmt;
24871 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
24872
24873 /* Temporary for holding the result, initialized to the input
24874 operand to ease control flow. */
24875 res = gen_reg_rtx (mode);
24876 emit_move_insn (res, operand1);
24877
24878 TWO52 = ix86_gen_TWO52 (mode);
24879 xa = ix86_expand_sse_fabs (res, &mask);
24880 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24881
24882 /* load nextafter (0.5, 0.0) */
24883 fmt = REAL_MODE_FORMAT (mode);
24884 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
24885 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
24886
24887 /* xa = xa + 0.5 */
24888 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
24889 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
24890
24891 /* xa = (double)(int64_t)xa */
24892 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24893 expand_fix (xi, xa, 0);
24894 expand_float (xa, xi, 0);
24895
24896 /* res = copysign (xa, operand1) */
24897 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
24898
24899 emit_label (label);
24900 LABEL_NUSES (label) = 1;
24901
24902 emit_move_insn (operand0, res);
24903 }
24904
24905 \f
24906 /* Validate whether a SSE5 instruction is valid or not.
24907 OPERANDS is the array of operands.
24908 NUM is the number of operands.
24909 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
24910 NUM_MEMORY is the maximum number of memory operands to accept. */
24911 bool
24912 ix86_sse5_valid_op_p (rtx operands[], rtx insn, int num, bool uses_oc0, int num_memory)
24913 {
24914 int mem_mask;
24915 int mem_count;
24916 int i;
24917
24918 /* Count the number of memory arguments */
24919 mem_mask = 0;
24920 mem_count = 0;
24921 for (i = 0; i < num; i++)
24922 {
24923 enum machine_mode mode = GET_MODE (operands[i]);
24924 if (register_operand (operands[i], mode))
24925 ;
24926
24927 else if (memory_operand (operands[i], mode))
24928 {
24929 mem_mask |= (1 << i);
24930 mem_count++;
24931 }
24932
24933 else
24934 {
24935 rtx pattern = PATTERN (insn);
24936
24937 /* allow 0 for pcmov */
24938 if (GET_CODE (pattern) != SET
24939 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
24940 || i < 2
24941 || operands[i] != CONST0_RTX (mode))
24942 return false;
24943 }
24944 }
24945
24946 /* If there were no memory operations, allow the insn */
24947 if (mem_mask == 0)
24948 return true;
24949
24950 /* Do not allow the destination register to be a memory operand. */
24951 else if (mem_mask & (1 << 0))
24952 return false;
24953
24954 /* If there are too many memory operations, disallow the instruction. While
24955 the hardware only allows 1 memory reference, before register allocation
24956 for some insns, we allow two memory operations sometimes in order to allow
24957 code like the following to be optimized:
24958
24959 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
24960
24961 or similar cases that are vectorized into using the fmaddss
24962 instruction. */
24963 else if (mem_count > num_memory)
24964 return false;
24965
24966 /* Don't allow more than one memory operation if not optimizing. */
24967 else if (mem_count > 1 && !optimize)
24968 return false;
24969
24970 else if (num == 4 && mem_count == 1)
24971 {
24972 /* formats (destination is the first argument), example fmaddss:
24973 xmm1, xmm1, xmm2, xmm3/mem
24974 xmm1, xmm1, xmm2/mem, xmm3
24975 xmm1, xmm2, xmm3/mem, xmm1
24976 xmm1, xmm2/mem, xmm3, xmm1 */
24977 if (uses_oc0)
24978 return ((mem_mask == (1 << 1))
24979 || (mem_mask == (1 << 2))
24980 || (mem_mask == (1 << 3)));
24981
24982 /* format, example pmacsdd:
24983 xmm1, xmm2, xmm3/mem, xmm1 */
24984 else
24985 return (mem_mask == (1 << 2));
24986 }
24987
24988 else if (num == 4 && num_memory == 2)
24989 {
24990 /* If there are two memory operations, we can load one of the memory ops
24991 into the destination register. This is for optimizing the
24992 multiply/add ops, which the combiner has optimized both the multiply
24993 and the add insns to have a memory operation. We have to be careful
24994 that the destination doesn't overlap with the inputs. */
24995 rtx op0 = operands[0];
24996
24997 if (reg_mentioned_p (op0, operands[1])
24998 || reg_mentioned_p (op0, operands[2])
24999 || reg_mentioned_p (op0, operands[3]))
25000 return false;
25001
25002 /* formats (destination is the first argument), example fmaddss:
25003 xmm1, xmm1, xmm2, xmm3/mem
25004 xmm1, xmm1, xmm2/mem, xmm3
25005 xmm1, xmm2, xmm3/mem, xmm1
25006 xmm1, xmm2/mem, xmm3, xmm1
25007
25008 For the oc0 case, we will load either operands[1] or operands[3] into
25009 operands[0], so any combination of 2 memory operands is ok. */
25010 if (uses_oc0)
25011 return true;
25012
25013 /* format, example pmacsdd:
25014 xmm1, xmm2, xmm3/mem, xmm1
25015
25016 For the integer multiply/add instructions be more restrictive and
25017 require operands[2] and operands[3] to be the memory operands. */
25018 else
25019 return (mem_mask == ((1 << 2) | (1 << 3)));
25020 }
25021
25022 else if (num == 3 && num_memory == 1)
25023 {
25024 /* formats, example protb:
25025 xmm1, xmm2, xmm3/mem
25026 xmm1, xmm2/mem, xmm3 */
25027 if (uses_oc0)
25028 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
25029
25030 /* format, example comeq:
25031 xmm1, xmm2, xmm3/mem */
25032 else
25033 return (mem_mask == (1 << 2));
25034 }
25035
25036 else
25037 gcc_unreachable ();
25038
25039 return false;
25040 }
25041
25042 \f
25043 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
25044 hardware will allow by using the destination register to load one of the
25045 memory operations. Presently this is used by the multiply/add routines to
25046 allow 2 memory references. */
25047
25048 void
25049 ix86_expand_sse5_multiple_memory (rtx operands[],
25050 int num,
25051 enum machine_mode mode)
25052 {
25053 rtx op0 = operands[0];
25054 if (num != 4
25055 || memory_operand (op0, mode)
25056 || reg_mentioned_p (op0, operands[1])
25057 || reg_mentioned_p (op0, operands[2])
25058 || reg_mentioned_p (op0, operands[3]))
25059 gcc_unreachable ();
25060
25061 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
25062 the destination register. */
25063 if (memory_operand (operands[1], mode))
25064 {
25065 emit_move_insn (op0, operands[1]);
25066 operands[1] = op0;
25067 }
25068 else if (memory_operand (operands[3], mode))
25069 {
25070 emit_move_insn (op0, operands[3]);
25071 operands[3] = op0;
25072 }
25073 else
25074 gcc_unreachable ();
25075
25076 return;
25077 }
25078
25079 \f
25080 /* Table of valid machine attributes. */
25081 static const struct attribute_spec ix86_attribute_table[] =
25082 {
25083 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25084 /* Stdcall attribute says callee is responsible for popping arguments
25085 if they are not variable. */
25086 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25087 /* Fastcall attribute says callee is responsible for popping arguments
25088 if they are not variable. */
25089 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25090 /* Cdecl attribute says the callee is a normal C declaration */
25091 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25092 /* Regparm attribute specifies how many integer arguments are to be
25093 passed in registers. */
25094 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
25095 /* Sseregparm attribute says we are using x86_64 calling conventions
25096 for FP arguments. */
25097 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25098 /* force_align_arg_pointer says this function realigns the stack at entry. */
25099 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
25100 false, true, true, ix86_handle_cconv_attribute },
25101 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25102 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
25103 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
25104 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
25105 #endif
25106 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25107 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25108 #ifdef SUBTARGET_ATTRIBUTE_TABLE
25109 SUBTARGET_ATTRIBUTE_TABLE,
25110 #endif
25111 { NULL, 0, 0, false, false, false, NULL }
25112 };
25113
25114 /* Implement targetm.vectorize.builtin_vectorization_cost. */
25115 static int
25116 x86_builtin_vectorization_cost (bool runtime_test)
25117 {
25118 /* If the branch of the runtime test is taken - i.e. - the vectorized
25119 version is skipped - this incurs a misprediction cost (because the
25120 vectorized version is expected to be the fall-through). So we subtract
25121 the latency of a mispredicted branch from the costs that are incured
25122 when the vectorized version is executed.
25123
25124 TODO: The values in individual target tables have to be tuned or new
25125 fields may be needed. For eg. on K8, the default branch path is the
25126 not-taken path. If the taken path is predicted correctly, the minimum
25127 penalty of going down the taken-path is 1 cycle. If the taken-path is
25128 not predicted correctly, then the minimum penalty is 10 cycles. */
25129
25130 if (runtime_test)
25131 {
25132 return (-(ix86_cost->cond_taken_branch_cost));
25133 }
25134 else
25135 return 0;
25136 }
25137
25138 /* Initialize the GCC target structure. */
25139 #undef TARGET_ATTRIBUTE_TABLE
25140 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25141 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25142 # undef TARGET_MERGE_DECL_ATTRIBUTES
25143 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25144 #endif
25145
25146 #undef TARGET_COMP_TYPE_ATTRIBUTES
25147 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25148
25149 #undef TARGET_INIT_BUILTINS
25150 #define TARGET_INIT_BUILTINS ix86_init_builtins
25151 #undef TARGET_EXPAND_BUILTIN
25152 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25153
25154 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25155 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25156 ix86_builtin_vectorized_function
25157
25158 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
25159 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25160
25161 #undef TARGET_BUILTIN_RECIPROCAL
25162 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25163
25164 #undef TARGET_ASM_FUNCTION_EPILOGUE
25165 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25166
25167 #undef TARGET_ENCODE_SECTION_INFO
25168 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25169 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25170 #else
25171 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25172 #endif
25173
25174 #undef TARGET_ASM_OPEN_PAREN
25175 #define TARGET_ASM_OPEN_PAREN ""
25176 #undef TARGET_ASM_CLOSE_PAREN
25177 #define TARGET_ASM_CLOSE_PAREN ""
25178
25179 #undef TARGET_ASM_ALIGNED_HI_OP
25180 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25181 #undef TARGET_ASM_ALIGNED_SI_OP
25182 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25183 #ifdef ASM_QUAD
25184 #undef TARGET_ASM_ALIGNED_DI_OP
25185 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25186 #endif
25187
25188 #undef TARGET_ASM_UNALIGNED_HI_OP
25189 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25190 #undef TARGET_ASM_UNALIGNED_SI_OP
25191 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25192 #undef TARGET_ASM_UNALIGNED_DI_OP
25193 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25194
25195 #undef TARGET_SCHED_ADJUST_COST
25196 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25197 #undef TARGET_SCHED_ISSUE_RATE
25198 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25199 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25200 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25201 ia32_multipass_dfa_lookahead
25202
25203 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
25204 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
25205
25206 #ifdef HAVE_AS_TLS
25207 #undef TARGET_HAVE_TLS
25208 #define TARGET_HAVE_TLS true
25209 #endif
25210 #undef TARGET_CANNOT_FORCE_CONST_MEM
25211 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
25212 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
25213 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
25214
25215 #undef TARGET_DELEGITIMIZE_ADDRESS
25216 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
25217
25218 #undef TARGET_MS_BITFIELD_LAYOUT_P
25219 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
25220
25221 #if TARGET_MACHO
25222 #undef TARGET_BINDS_LOCAL_P
25223 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
25224 #endif
25225 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25226 #undef TARGET_BINDS_LOCAL_P
25227 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
25228 #endif
25229
25230 #undef TARGET_ASM_OUTPUT_MI_THUNK
25231 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
25232 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
25233 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
25234
25235 #undef TARGET_ASM_FILE_START
25236 #define TARGET_ASM_FILE_START x86_file_start
25237
25238 #undef TARGET_DEFAULT_TARGET_FLAGS
25239 #define TARGET_DEFAULT_TARGET_FLAGS \
25240 (TARGET_DEFAULT \
25241 | TARGET_SUBTARGET_DEFAULT \
25242 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
25243
25244 #undef TARGET_HANDLE_OPTION
25245 #define TARGET_HANDLE_OPTION ix86_handle_option
25246
25247 #undef TARGET_RTX_COSTS
25248 #define TARGET_RTX_COSTS ix86_rtx_costs
25249 #undef TARGET_ADDRESS_COST
25250 #define TARGET_ADDRESS_COST ix86_address_cost
25251
25252 #undef TARGET_FIXED_CONDITION_CODE_REGS
25253 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
25254 #undef TARGET_CC_MODES_COMPATIBLE
25255 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
25256
25257 #undef TARGET_MACHINE_DEPENDENT_REORG
25258 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
25259
25260 #undef TARGET_BUILD_BUILTIN_VA_LIST
25261 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
25262
25263 #undef TARGET_EXPAND_BUILTIN_VA_START
25264 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
25265
25266 #undef TARGET_MD_ASM_CLOBBERS
25267 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
25268
25269 #undef TARGET_PROMOTE_PROTOTYPES
25270 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
25271 #undef TARGET_STRUCT_VALUE_RTX
25272 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
25273 #undef TARGET_SETUP_INCOMING_VARARGS
25274 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
25275 #undef TARGET_MUST_PASS_IN_STACK
25276 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
25277 #undef TARGET_PASS_BY_REFERENCE
25278 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
25279 #undef TARGET_INTERNAL_ARG_POINTER
25280 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
25281 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
25282 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
25283 #undef TARGET_STRICT_ARGUMENT_NAMING
25284 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
25285
25286 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
25287 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
25288
25289 #undef TARGET_SCALAR_MODE_SUPPORTED_P
25290 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
25291
25292 #undef TARGET_VECTOR_MODE_SUPPORTED_P
25293 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
25294
25295 #undef TARGET_C_MODE_FOR_SUFFIX
25296 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
25297
25298 #ifdef HAVE_AS_TLS
25299 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
25300 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
25301 #endif
25302
25303 #ifdef SUBTARGET_INSERT_ATTRIBUTES
25304 #undef TARGET_INSERT_ATTRIBUTES
25305 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
25306 #endif
25307
25308 #undef TARGET_MANGLE_TYPE
25309 #define TARGET_MANGLE_TYPE ix86_mangle_type
25310
25311 #undef TARGET_STACK_PROTECT_FAIL
25312 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
25313
25314 #undef TARGET_FUNCTION_VALUE
25315 #define TARGET_FUNCTION_VALUE ix86_function_value
25316
25317 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
25318 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
25319
25320 struct gcc_target targetm = TARGET_INITIALIZER;
25321 \f
25322 #include "gt-i386.h"