revert: i386.c (ix86_rtx_costs): For standard 80387 constants...
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
58
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
66
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
70
71 static const
72 struct processor_costs size_cost = { /* costs for tuning for size */
73 COSTS_N_BYTES (2), /* cost of an add instruction */
74 COSTS_N_BYTES (3), /* cost of a lea instruction */
75 COSTS_N_BYTES (2), /* variable shift costs */
76 COSTS_N_BYTES (3), /* constant shift costs */
77 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
78 COSTS_N_BYTES (3), /* HI */
79 COSTS_N_BYTES (3), /* SI */
80 COSTS_N_BYTES (3), /* DI */
81 COSTS_N_BYTES (5)}, /* other */
82 0, /* cost of multiply per each bit set */
83 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
84 COSTS_N_BYTES (3), /* HI */
85 COSTS_N_BYTES (3), /* SI */
86 COSTS_N_BYTES (3), /* DI */
87 COSTS_N_BYTES (5)}, /* other */
88 COSTS_N_BYTES (3), /* cost of movsx */
89 COSTS_N_BYTES (3), /* cost of movzx */
90 0, /* "large" insn */
91 2, /* MOVE_RATIO */
92 2, /* cost for loading QImode using movzbl */
93 {2, 2, 2}, /* cost of loading integer registers
94 in QImode, HImode and SImode.
95 Relative to reg-reg move (2). */
96 {2, 2, 2}, /* cost of storing integer registers */
97 2, /* cost of reg,reg fld/fst */
98 {2, 2, 2}, /* cost of loading fp registers
99 in SFmode, DFmode and XFmode */
100 {2, 2, 2}, /* cost of storing fp registers
101 in SFmode, DFmode and XFmode */
102 3, /* cost of moving MMX register */
103 {3, 3}, /* cost of loading MMX registers
104 in SImode and DImode */
105 {3, 3}, /* cost of storing MMX registers
106 in SImode and DImode */
107 3, /* cost of moving SSE register */
108 {3, 3, 3}, /* cost of loading SSE registers
109 in SImode, DImode and TImode */
110 {3, 3, 3}, /* cost of storing SSE registers
111 in SImode, DImode and TImode */
112 3, /* MMX or SSE register to integer */
113 0, /* size of prefetch block */
114 0, /* number of parallel prefetches */
115 2, /* Branch cost */
116 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
117 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
118 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
119 COSTS_N_BYTES (2), /* cost of FABS instruction. */
120 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
121 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
122 };
123
124 /* Processor costs (relative to an add) */
125 static const
126 struct processor_costs i386_cost = { /* 386 specific costs */
127 COSTS_N_INSNS (1), /* cost of an add instruction */
128 COSTS_N_INSNS (1), /* cost of a lea instruction */
129 COSTS_N_INSNS (3), /* variable shift costs */
130 COSTS_N_INSNS (2), /* constant shift costs */
131 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
132 COSTS_N_INSNS (6), /* HI */
133 COSTS_N_INSNS (6), /* SI */
134 COSTS_N_INSNS (6), /* DI */
135 COSTS_N_INSNS (6)}, /* other */
136 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
137 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
138 COSTS_N_INSNS (23), /* HI */
139 COSTS_N_INSNS (23), /* SI */
140 COSTS_N_INSNS (23), /* DI */
141 COSTS_N_INSNS (23)}, /* other */
142 COSTS_N_INSNS (3), /* cost of movsx */
143 COSTS_N_INSNS (2), /* cost of movzx */
144 15, /* "large" insn */
145 3, /* MOVE_RATIO */
146 4, /* cost for loading QImode using movzbl */
147 {2, 4, 2}, /* cost of loading integer registers
148 in QImode, HImode and SImode.
149 Relative to reg-reg move (2). */
150 {2, 4, 2}, /* cost of storing integer registers */
151 2, /* cost of reg,reg fld/fst */
152 {8, 8, 8}, /* cost of loading fp registers
153 in SFmode, DFmode and XFmode */
154 {8, 8, 8}, /* cost of storing fp registers
155 in SFmode, DFmode and XFmode */
156 2, /* cost of moving MMX register */
157 {4, 8}, /* cost of loading MMX registers
158 in SImode and DImode */
159 {4, 8}, /* cost of storing MMX registers
160 in SImode and DImode */
161 2, /* cost of moving SSE register */
162 {4, 8, 16}, /* cost of loading SSE registers
163 in SImode, DImode and TImode */
164 {4, 8, 16}, /* cost of storing SSE registers
165 in SImode, DImode and TImode */
166 3, /* MMX or SSE register to integer */
167 0, /* size of prefetch block */
168 0, /* number of parallel prefetches */
169 1, /* Branch cost */
170 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
171 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
172 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
173 COSTS_N_INSNS (22), /* cost of FABS instruction. */
174 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
175 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
176 };
177
178 static const
179 struct processor_costs i486_cost = { /* 486 specific costs */
180 COSTS_N_INSNS (1), /* cost of an add instruction */
181 COSTS_N_INSNS (1), /* cost of a lea instruction */
182 COSTS_N_INSNS (3), /* variable shift costs */
183 COSTS_N_INSNS (2), /* constant shift costs */
184 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
185 COSTS_N_INSNS (12), /* HI */
186 COSTS_N_INSNS (12), /* SI */
187 COSTS_N_INSNS (12), /* DI */
188 COSTS_N_INSNS (12)}, /* other */
189 1, /* cost of multiply per each bit set */
190 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
191 COSTS_N_INSNS (40), /* HI */
192 COSTS_N_INSNS (40), /* SI */
193 COSTS_N_INSNS (40), /* DI */
194 COSTS_N_INSNS (40)}, /* other */
195 COSTS_N_INSNS (3), /* cost of movsx */
196 COSTS_N_INSNS (2), /* cost of movzx */
197 15, /* "large" insn */
198 3, /* MOVE_RATIO */
199 4, /* cost for loading QImode using movzbl */
200 {2, 4, 2}, /* cost of loading integer registers
201 in QImode, HImode and SImode.
202 Relative to reg-reg move (2). */
203 {2, 4, 2}, /* cost of storing integer registers */
204 2, /* cost of reg,reg fld/fst */
205 {8, 8, 8}, /* cost of loading fp registers
206 in SFmode, DFmode and XFmode */
207 {8, 8, 8}, /* cost of storing fp registers
208 in SFmode, DFmode and XFmode */
209 2, /* cost of moving MMX register */
210 {4, 8}, /* cost of loading MMX registers
211 in SImode and DImode */
212 {4, 8}, /* cost of storing MMX registers
213 in SImode and DImode */
214 2, /* cost of moving SSE register */
215 {4, 8, 16}, /* cost of loading SSE registers
216 in SImode, DImode and TImode */
217 {4, 8, 16}, /* cost of storing SSE registers
218 in SImode, DImode and TImode */
219 3, /* MMX or SSE register to integer */
220 0, /* size of prefetch block */
221 0, /* number of parallel prefetches */
222 1, /* Branch cost */
223 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
224 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
225 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
226 COSTS_N_INSNS (3), /* cost of FABS instruction. */
227 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
228 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
229 };
230
231 static const
232 struct processor_costs pentium_cost = {
233 COSTS_N_INSNS (1), /* cost of an add instruction */
234 COSTS_N_INSNS (1), /* cost of a lea instruction */
235 COSTS_N_INSNS (4), /* variable shift costs */
236 COSTS_N_INSNS (1), /* constant shift costs */
237 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
238 COSTS_N_INSNS (11), /* HI */
239 COSTS_N_INSNS (11), /* SI */
240 COSTS_N_INSNS (11), /* DI */
241 COSTS_N_INSNS (11)}, /* other */
242 0, /* cost of multiply per each bit set */
243 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
244 COSTS_N_INSNS (25), /* HI */
245 COSTS_N_INSNS (25), /* SI */
246 COSTS_N_INSNS (25), /* DI */
247 COSTS_N_INSNS (25)}, /* other */
248 COSTS_N_INSNS (3), /* cost of movsx */
249 COSTS_N_INSNS (2), /* cost of movzx */
250 8, /* "large" insn */
251 6, /* MOVE_RATIO */
252 6, /* cost for loading QImode using movzbl */
253 {2, 4, 2}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 4, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of storing fp registers
261 in SFmode, DFmode and XFmode */
262 8, /* cost of moving MMX register */
263 {8, 8}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {8, 8}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {4, 8, 16}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {4, 8, 16}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 0, /* size of prefetch block */
274 0, /* number of parallel prefetches */
275 2, /* Branch cost */
276 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
277 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
278 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
279 COSTS_N_INSNS (1), /* cost of FABS instruction. */
280 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
281 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
282 };
283
284 static const
285 struct processor_costs pentiumpro_cost = {
286 COSTS_N_INSNS (1), /* cost of an add instruction */
287 COSTS_N_INSNS (1), /* cost of a lea instruction */
288 COSTS_N_INSNS (1), /* variable shift costs */
289 COSTS_N_INSNS (1), /* constant shift costs */
290 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
291 COSTS_N_INSNS (4), /* HI */
292 COSTS_N_INSNS (4), /* SI */
293 COSTS_N_INSNS (4), /* DI */
294 COSTS_N_INSNS (4)}, /* other */
295 0, /* cost of multiply per each bit set */
296 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
297 COSTS_N_INSNS (17), /* HI */
298 COSTS_N_INSNS (17), /* SI */
299 COSTS_N_INSNS (17), /* DI */
300 COSTS_N_INSNS (17)}, /* other */
301 COSTS_N_INSNS (1), /* cost of movsx */
302 COSTS_N_INSNS (1), /* cost of movzx */
303 8, /* "large" insn */
304 6, /* MOVE_RATIO */
305 2, /* cost for loading QImode using movzbl */
306 {4, 4, 4}, /* cost of loading integer registers
307 in QImode, HImode and SImode.
308 Relative to reg-reg move (2). */
309 {2, 2, 2}, /* cost of storing integer registers */
310 2, /* cost of reg,reg fld/fst */
311 {2, 2, 6}, /* cost of loading fp registers
312 in SFmode, DFmode and XFmode */
313 {4, 4, 6}, /* cost of storing fp registers
314 in SFmode, DFmode and XFmode */
315 2, /* cost of moving MMX register */
316 {2, 2}, /* cost of loading MMX registers
317 in SImode and DImode */
318 {2, 2}, /* cost of storing MMX registers
319 in SImode and DImode */
320 2, /* cost of moving SSE register */
321 {2, 2, 8}, /* cost of loading SSE registers
322 in SImode, DImode and TImode */
323 {2, 2, 8}, /* cost of storing SSE registers
324 in SImode, DImode and TImode */
325 3, /* MMX or SSE register to integer */
326 32, /* size of prefetch block */
327 6, /* number of parallel prefetches */
328 2, /* Branch cost */
329 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
330 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
331 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
332 COSTS_N_INSNS (2), /* cost of FABS instruction. */
333 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
334 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
335 };
336
337 static const
338 struct processor_costs geode_cost = {
339 COSTS_N_INSNS (1), /* cost of an add instruction */
340 COSTS_N_INSNS (1), /* cost of a lea instruction */
341 COSTS_N_INSNS (2), /* variable shift costs */
342 COSTS_N_INSNS (1), /* constant shift costs */
343 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
344 COSTS_N_INSNS (4), /* HI */
345 COSTS_N_INSNS (7), /* SI */
346 COSTS_N_INSNS (7), /* DI */
347 COSTS_N_INSNS (7)}, /* other */
348 0, /* cost of multiply per each bit set */
349 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
350 COSTS_N_INSNS (23), /* HI */
351 COSTS_N_INSNS (39), /* SI */
352 COSTS_N_INSNS (39), /* DI */
353 COSTS_N_INSNS (39)}, /* other */
354 COSTS_N_INSNS (1), /* cost of movsx */
355 COSTS_N_INSNS (1), /* cost of movzx */
356 8, /* "large" insn */
357 4, /* MOVE_RATIO */
358 1, /* cost for loading QImode using movzbl */
359 {1, 1, 1}, /* cost of loading integer registers
360 in QImode, HImode and SImode.
361 Relative to reg-reg move (2). */
362 {1, 1, 1}, /* cost of storing integer registers */
363 1, /* cost of reg,reg fld/fst */
364 {1, 1, 1}, /* cost of loading fp registers
365 in SFmode, DFmode and XFmode */
366 {4, 6, 6}, /* cost of storing fp registers
367 in SFmode, DFmode and XFmode */
368
369 1, /* cost of moving MMX register */
370 {1, 1}, /* cost of loading MMX registers
371 in SImode and DImode */
372 {1, 1}, /* cost of storing MMX registers
373 in SImode and DImode */
374 1, /* cost of moving SSE register */
375 {1, 1, 1}, /* cost of loading SSE registers
376 in SImode, DImode and TImode */
377 {1, 1, 1}, /* cost of storing SSE registers
378 in SImode, DImode and TImode */
379 1, /* MMX or SSE register to integer */
380 32, /* size of prefetch block */
381 1, /* number of parallel prefetches */
382 1, /* Branch cost */
383 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
384 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
385 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
386 COSTS_N_INSNS (1), /* cost of FABS instruction. */
387 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
388 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
389 };
390
391 static const
392 struct processor_costs k6_cost = {
393 COSTS_N_INSNS (1), /* cost of an add instruction */
394 COSTS_N_INSNS (2), /* cost of a lea instruction */
395 COSTS_N_INSNS (1), /* variable shift costs */
396 COSTS_N_INSNS (1), /* constant shift costs */
397 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
398 COSTS_N_INSNS (3), /* HI */
399 COSTS_N_INSNS (3), /* SI */
400 COSTS_N_INSNS (3), /* DI */
401 COSTS_N_INSNS (3)}, /* other */
402 0, /* cost of multiply per each bit set */
403 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
404 COSTS_N_INSNS (18), /* HI */
405 COSTS_N_INSNS (18), /* SI */
406 COSTS_N_INSNS (18), /* DI */
407 COSTS_N_INSNS (18)}, /* other */
408 COSTS_N_INSNS (2), /* cost of movsx */
409 COSTS_N_INSNS (2), /* cost of movzx */
410 8, /* "large" insn */
411 4, /* MOVE_RATIO */
412 3, /* cost for loading QImode using movzbl */
413 {4, 5, 4}, /* cost of loading integer registers
414 in QImode, HImode and SImode.
415 Relative to reg-reg move (2). */
416 {2, 3, 2}, /* cost of storing integer registers */
417 4, /* cost of reg,reg fld/fst */
418 {6, 6, 6}, /* cost of loading fp registers
419 in SFmode, DFmode and XFmode */
420 {4, 4, 4}, /* cost of storing fp registers
421 in SFmode, DFmode and XFmode */
422 2, /* cost of moving MMX register */
423 {2, 2}, /* cost of loading MMX registers
424 in SImode and DImode */
425 {2, 2}, /* cost of storing MMX registers
426 in SImode and DImode */
427 2, /* cost of moving SSE register */
428 {2, 2, 8}, /* cost of loading SSE registers
429 in SImode, DImode and TImode */
430 {2, 2, 8}, /* cost of storing SSE registers
431 in SImode, DImode and TImode */
432 6, /* MMX or SSE register to integer */
433 32, /* size of prefetch block */
434 1, /* number of parallel prefetches */
435 1, /* Branch cost */
436 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
437 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
438 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
439 COSTS_N_INSNS (2), /* cost of FABS instruction. */
440 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
441 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
442 };
443
444 static const
445 struct processor_costs athlon_cost = {
446 COSTS_N_INSNS (1), /* cost of an add instruction */
447 COSTS_N_INSNS (2), /* cost of a lea instruction */
448 COSTS_N_INSNS (1), /* variable shift costs */
449 COSTS_N_INSNS (1), /* constant shift costs */
450 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
451 COSTS_N_INSNS (5), /* HI */
452 COSTS_N_INSNS (5), /* SI */
453 COSTS_N_INSNS (5), /* DI */
454 COSTS_N_INSNS (5)}, /* other */
455 0, /* cost of multiply per each bit set */
456 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
457 COSTS_N_INSNS (26), /* HI */
458 COSTS_N_INSNS (42), /* SI */
459 COSTS_N_INSNS (74), /* DI */
460 COSTS_N_INSNS (74)}, /* other */
461 COSTS_N_INSNS (1), /* cost of movsx */
462 COSTS_N_INSNS (1), /* cost of movzx */
463 8, /* "large" insn */
464 9, /* MOVE_RATIO */
465 4, /* cost for loading QImode using movzbl */
466 {3, 4, 3}, /* cost of loading integer registers
467 in QImode, HImode and SImode.
468 Relative to reg-reg move (2). */
469 {3, 4, 3}, /* cost of storing integer registers */
470 4, /* cost of reg,reg fld/fst */
471 {4, 4, 12}, /* cost of loading fp registers
472 in SFmode, DFmode and XFmode */
473 {6, 6, 8}, /* cost of storing fp registers
474 in SFmode, DFmode and XFmode */
475 2, /* cost of moving MMX register */
476 {4, 4}, /* cost of loading MMX registers
477 in SImode and DImode */
478 {4, 4}, /* cost of storing MMX registers
479 in SImode and DImode */
480 2, /* cost of moving SSE register */
481 {4, 4, 6}, /* cost of loading SSE registers
482 in SImode, DImode and TImode */
483 {4, 4, 5}, /* cost of storing SSE registers
484 in SImode, DImode and TImode */
485 5, /* MMX or SSE register to integer */
486 64, /* size of prefetch block */
487 6, /* number of parallel prefetches */
488 5, /* Branch cost */
489 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
490 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
491 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
492 COSTS_N_INSNS (2), /* cost of FABS instruction. */
493 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
494 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
495 };
496
497 static const
498 struct processor_costs k8_cost = {
499 COSTS_N_INSNS (1), /* cost of an add instruction */
500 COSTS_N_INSNS (2), /* cost of a lea instruction */
501 COSTS_N_INSNS (1), /* variable shift costs */
502 COSTS_N_INSNS (1), /* constant shift costs */
503 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
504 COSTS_N_INSNS (4), /* HI */
505 COSTS_N_INSNS (3), /* SI */
506 COSTS_N_INSNS (4), /* DI */
507 COSTS_N_INSNS (5)}, /* other */
508 0, /* cost of multiply per each bit set */
509 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
510 COSTS_N_INSNS (26), /* HI */
511 COSTS_N_INSNS (42), /* SI */
512 COSTS_N_INSNS (74), /* DI */
513 COSTS_N_INSNS (74)}, /* other */
514 COSTS_N_INSNS (1), /* cost of movsx */
515 COSTS_N_INSNS (1), /* cost of movzx */
516 8, /* "large" insn */
517 9, /* MOVE_RATIO */
518 4, /* cost for loading QImode using movzbl */
519 {3, 4, 3}, /* cost of loading integer registers
520 in QImode, HImode and SImode.
521 Relative to reg-reg move (2). */
522 {3, 4, 3}, /* cost of storing integer registers */
523 4, /* cost of reg,reg fld/fst */
524 {4, 4, 12}, /* cost of loading fp registers
525 in SFmode, DFmode and XFmode */
526 {6, 6, 8}, /* cost of storing fp registers
527 in SFmode, DFmode and XFmode */
528 2, /* cost of moving MMX register */
529 {3, 3}, /* cost of loading MMX registers
530 in SImode and DImode */
531 {4, 4}, /* cost of storing MMX registers
532 in SImode and DImode */
533 2, /* cost of moving SSE register */
534 {4, 3, 6}, /* cost of loading SSE registers
535 in SImode, DImode and TImode */
536 {4, 4, 5}, /* cost of storing SSE registers
537 in SImode, DImode and TImode */
538 5, /* MMX or SSE register to integer */
539 64, /* size of prefetch block */
540 /* New AMD processors never drop prefetches; if they cannot be performed
541 immediately, they are queued. We set number of simultaneous prefetches
542 to a large constant to reflect this (it probably is not a good idea not
543 to limit number of prefetches at all, as their execution also takes some
544 time). */
545 100, /* number of parallel prefetches */
546 5, /* Branch cost */
547 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
548 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
549 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
550 COSTS_N_INSNS (2), /* cost of FABS instruction. */
551 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
552 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
553 };
554
555 static const
556 struct processor_costs pentium4_cost = {
557 COSTS_N_INSNS (1), /* cost of an add instruction */
558 COSTS_N_INSNS (3), /* cost of a lea instruction */
559 COSTS_N_INSNS (4), /* variable shift costs */
560 COSTS_N_INSNS (4), /* constant shift costs */
561 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
562 COSTS_N_INSNS (15), /* HI */
563 COSTS_N_INSNS (15), /* SI */
564 COSTS_N_INSNS (15), /* DI */
565 COSTS_N_INSNS (15)}, /* other */
566 0, /* cost of multiply per each bit set */
567 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
568 COSTS_N_INSNS (56), /* HI */
569 COSTS_N_INSNS (56), /* SI */
570 COSTS_N_INSNS (56), /* DI */
571 COSTS_N_INSNS (56)}, /* other */
572 COSTS_N_INSNS (1), /* cost of movsx */
573 COSTS_N_INSNS (1), /* cost of movzx */
574 16, /* "large" insn */
575 6, /* MOVE_RATIO */
576 2, /* cost for loading QImode using movzbl */
577 {4, 5, 4}, /* cost of loading integer registers
578 in QImode, HImode and SImode.
579 Relative to reg-reg move (2). */
580 {2, 3, 2}, /* cost of storing integer registers */
581 2, /* cost of reg,reg fld/fst */
582 {2, 2, 6}, /* cost of loading fp registers
583 in SFmode, DFmode and XFmode */
584 {4, 4, 6}, /* cost of storing fp registers
585 in SFmode, DFmode and XFmode */
586 2, /* cost of moving MMX register */
587 {2, 2}, /* cost of loading MMX registers
588 in SImode and DImode */
589 {2, 2}, /* cost of storing MMX registers
590 in SImode and DImode */
591 12, /* cost of moving SSE register */
592 {12, 12, 12}, /* cost of loading SSE registers
593 in SImode, DImode and TImode */
594 {2, 2, 8}, /* cost of storing SSE registers
595 in SImode, DImode and TImode */
596 10, /* MMX or SSE register to integer */
597 64, /* size of prefetch block */
598 6, /* number of parallel prefetches */
599 2, /* Branch cost */
600 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
601 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
602 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
603 COSTS_N_INSNS (2), /* cost of FABS instruction. */
604 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
605 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
606 };
607
608 static const
609 struct processor_costs nocona_cost = {
610 COSTS_N_INSNS (1), /* cost of an add instruction */
611 COSTS_N_INSNS (1), /* cost of a lea instruction */
612 COSTS_N_INSNS (1), /* variable shift costs */
613 COSTS_N_INSNS (1), /* constant shift costs */
614 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
615 COSTS_N_INSNS (10), /* HI */
616 COSTS_N_INSNS (10), /* SI */
617 COSTS_N_INSNS (10), /* DI */
618 COSTS_N_INSNS (10)}, /* other */
619 0, /* cost of multiply per each bit set */
620 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
621 COSTS_N_INSNS (66), /* HI */
622 COSTS_N_INSNS (66), /* SI */
623 COSTS_N_INSNS (66), /* DI */
624 COSTS_N_INSNS (66)}, /* other */
625 COSTS_N_INSNS (1), /* cost of movsx */
626 COSTS_N_INSNS (1), /* cost of movzx */
627 16, /* "large" insn */
628 17, /* MOVE_RATIO */
629 4, /* cost for loading QImode using movzbl */
630 {4, 4, 4}, /* cost of loading integer registers
631 in QImode, HImode and SImode.
632 Relative to reg-reg move (2). */
633 {4, 4, 4}, /* cost of storing integer registers */
634 3, /* cost of reg,reg fld/fst */
635 {12, 12, 12}, /* cost of loading fp registers
636 in SFmode, DFmode and XFmode */
637 {4, 4, 4}, /* cost of storing fp registers
638 in SFmode, DFmode and XFmode */
639 6, /* cost of moving MMX register */
640 {12, 12}, /* cost of loading MMX registers
641 in SImode and DImode */
642 {12, 12}, /* cost of storing MMX registers
643 in SImode and DImode */
644 6, /* cost of moving SSE register */
645 {12, 12, 12}, /* cost of loading SSE registers
646 in SImode, DImode and TImode */
647 {12, 12, 12}, /* cost of storing SSE registers
648 in SImode, DImode and TImode */
649 8, /* MMX or SSE register to integer */
650 128, /* size of prefetch block */
651 8, /* number of parallel prefetches */
652 1, /* Branch cost */
653 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
654 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
655 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
656 COSTS_N_INSNS (3), /* cost of FABS instruction. */
657 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
658 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
659 };
660
661 static const
662 struct processor_costs core2_cost = {
663 COSTS_N_INSNS (1), /* cost of an add instruction */
664 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
665 COSTS_N_INSNS (1), /* variable shift costs */
666 COSTS_N_INSNS (1), /* constant shift costs */
667 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
668 COSTS_N_INSNS (3), /* HI */
669 COSTS_N_INSNS (3), /* SI */
670 COSTS_N_INSNS (3), /* DI */
671 COSTS_N_INSNS (3)}, /* other */
672 0, /* cost of multiply per each bit set */
673 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
674 COSTS_N_INSNS (22), /* HI */
675 COSTS_N_INSNS (22), /* SI */
676 COSTS_N_INSNS (22), /* DI */
677 COSTS_N_INSNS (22)}, /* other */
678 COSTS_N_INSNS (1), /* cost of movsx */
679 COSTS_N_INSNS (1), /* cost of movzx */
680 8, /* "large" insn */
681 16, /* MOVE_RATIO */
682 2, /* cost for loading QImode using movzbl */
683 {6, 6, 6}, /* cost of loading integer registers
684 in QImode, HImode and SImode.
685 Relative to reg-reg move (2). */
686 {4, 4, 4}, /* cost of storing integer registers */
687 2, /* cost of reg,reg fld/fst */
688 {6, 6, 6}, /* cost of loading fp registers
689 in SFmode, DFmode and XFmode */
690 {4, 4, 4}, /* cost of loading integer registers */
691 2, /* cost of moving MMX register */
692 {6, 6}, /* cost of loading MMX registers
693 in SImode and DImode */
694 {4, 4}, /* cost of storing MMX registers
695 in SImode and DImode */
696 2, /* cost of moving SSE register */
697 {6, 6, 6}, /* cost of loading SSE registers
698 in SImode, DImode and TImode */
699 {4, 4, 4}, /* cost of storing SSE registers
700 in SImode, DImode and TImode */
701 2, /* MMX or SSE register to integer */
702 128, /* size of prefetch block */
703 8, /* number of parallel prefetches */
704 3, /* Branch cost */
705 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
706 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
707 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
708 COSTS_N_INSNS (1), /* cost of FABS instruction. */
709 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
710 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
711 };
712
713 /* Generic64 should produce code tuned for Nocona and K8. */
714 static const
715 struct processor_costs generic64_cost = {
716 COSTS_N_INSNS (1), /* cost of an add instruction */
717 /* On all chips taken into consideration lea is 2 cycles and more. With
718 this cost however our current implementation of synth_mult results in
719 use of unnecessary temporary registers causing regression on several
720 SPECfp benchmarks. */
721 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
722 COSTS_N_INSNS (1), /* variable shift costs */
723 COSTS_N_INSNS (1), /* constant shift costs */
724 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
725 COSTS_N_INSNS (4), /* HI */
726 COSTS_N_INSNS (3), /* SI */
727 COSTS_N_INSNS (4), /* DI */
728 COSTS_N_INSNS (2)}, /* other */
729 0, /* cost of multiply per each bit set */
730 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
731 COSTS_N_INSNS (26), /* HI */
732 COSTS_N_INSNS (42), /* SI */
733 COSTS_N_INSNS (74), /* DI */
734 COSTS_N_INSNS (74)}, /* other */
735 COSTS_N_INSNS (1), /* cost of movsx */
736 COSTS_N_INSNS (1), /* cost of movzx */
737 8, /* "large" insn */
738 17, /* MOVE_RATIO */
739 4, /* cost for loading QImode using movzbl */
740 {4, 4, 4}, /* cost of loading integer registers
741 in QImode, HImode and SImode.
742 Relative to reg-reg move (2). */
743 {4, 4, 4}, /* cost of storing integer registers */
744 4, /* cost of reg,reg fld/fst */
745 {12, 12, 12}, /* cost of loading fp registers
746 in SFmode, DFmode and XFmode */
747 {6, 6, 8}, /* cost of storing fp registers
748 in SFmode, DFmode and XFmode */
749 2, /* cost of moving MMX register */
750 {8, 8}, /* cost of loading MMX registers
751 in SImode and DImode */
752 {8, 8}, /* cost of storing MMX registers
753 in SImode and DImode */
754 2, /* cost of moving SSE register */
755 {8, 8, 8}, /* cost of loading SSE registers
756 in SImode, DImode and TImode */
757 {8, 8, 8}, /* cost of storing SSE registers
758 in SImode, DImode and TImode */
759 5, /* MMX or SSE register to integer */
760 64, /* size of prefetch block */
761 6, /* number of parallel prefetches */
762 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
763 is increased to perhaps more appropriate value of 5. */
764 3, /* Branch cost */
765 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
766 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
767 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
768 COSTS_N_INSNS (8), /* cost of FABS instruction. */
769 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
770 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
771 };
772
773 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
774 static const
775 struct processor_costs generic32_cost = {
776 COSTS_N_INSNS (1), /* cost of an add instruction */
777 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
778 COSTS_N_INSNS (1), /* variable shift costs */
779 COSTS_N_INSNS (1), /* constant shift costs */
780 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
781 COSTS_N_INSNS (4), /* HI */
782 COSTS_N_INSNS (3), /* SI */
783 COSTS_N_INSNS (4), /* DI */
784 COSTS_N_INSNS (2)}, /* other */
785 0, /* cost of multiply per each bit set */
786 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
787 COSTS_N_INSNS (26), /* HI */
788 COSTS_N_INSNS (42), /* SI */
789 COSTS_N_INSNS (74), /* DI */
790 COSTS_N_INSNS (74)}, /* other */
791 COSTS_N_INSNS (1), /* cost of movsx */
792 COSTS_N_INSNS (1), /* cost of movzx */
793 8, /* "large" insn */
794 17, /* MOVE_RATIO */
795 4, /* cost for loading QImode using movzbl */
796 {4, 4, 4}, /* cost of loading integer registers
797 in QImode, HImode and SImode.
798 Relative to reg-reg move (2). */
799 {4, 4, 4}, /* cost of storing integer registers */
800 4, /* cost of reg,reg fld/fst */
801 {12, 12, 12}, /* cost of loading fp registers
802 in SFmode, DFmode and XFmode */
803 {6, 6, 8}, /* cost of storing fp registers
804 in SFmode, DFmode and XFmode */
805 2, /* cost of moving MMX register */
806 {8, 8}, /* cost of loading MMX registers
807 in SImode and DImode */
808 {8, 8}, /* cost of storing MMX registers
809 in SImode and DImode */
810 2, /* cost of moving SSE register */
811 {8, 8, 8}, /* cost of loading SSE registers
812 in SImode, DImode and TImode */
813 {8, 8, 8}, /* cost of storing SSE registers
814 in SImode, DImode and TImode */
815 5, /* MMX or SSE register to integer */
816 64, /* size of prefetch block */
817 6, /* number of parallel prefetches */
818 3, /* Branch cost */
819 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
820 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
821 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
822 COSTS_N_INSNS (8), /* cost of FABS instruction. */
823 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
824 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
825 };
826
827 const struct processor_costs *ix86_cost = &pentium_cost;
828
829 /* Processor feature/optimization bitmasks. */
830 #define m_386 (1<<PROCESSOR_I386)
831 #define m_486 (1<<PROCESSOR_I486)
832 #define m_PENT (1<<PROCESSOR_PENTIUM)
833 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
834 #define m_GEODE (1<<PROCESSOR_GEODE)
835 #define m_K6_GEODE (m_K6 | m_GEODE)
836 #define m_K6 (1<<PROCESSOR_K6)
837 #define m_ATHLON (1<<PROCESSOR_ATHLON)
838 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
839 #define m_K8 (1<<PROCESSOR_K8)
840 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
841 #define m_NOCONA (1<<PROCESSOR_NOCONA)
842 #define m_CORE2 (1<<PROCESSOR_CORE2)
843 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
844 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
845 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
846
847 /* Generic instruction choice should be common subset of supported CPUs
848 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
849
850 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
851 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
852 generic because it is not working well with PPro base chips. */
853 const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_CORE2 | m_GENERIC64;
854 const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
855 const int x86_zero_extend_with_and = m_486 | m_PENT;
856 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
857 const int x86_double_with_add = ~m_386;
858 const int x86_use_bit_test = m_386;
859 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_CORE2 | m_GENERIC;
860 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
861 const int x86_3dnow_a = m_ATHLON_K8;
862 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
863 /* Branch hints were put in P4 based on simulation result. But
864 after P4 was made, no performance benefit was observed with
865 branch hints. It also increases the code size. As the result,
866 icc never generates branch hints. */
867 const int x86_branch_hints = 0;
868 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
869 /* We probably ought to watch for partial register stalls on Generic32
870 compilation setting as well. However in current implementation the
871 partial register stalls are not eliminated very well - they can
872 be introduced via subregs synthesized by combine and can happen
873 in caller/callee saving sequences.
874 Because this option pays back little on PPro based chips and is in conflict
875 with partial reg. dependencies used by Athlon/P4 based chips, it is better
876 to leave it off for generic32 for now. */
877 const int x86_partial_reg_stall = m_PPRO;
878 const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
879 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
880 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_CORE2 | m_GENERIC);
881 const int x86_use_mov0 = m_K6;
882 const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
883 const int x86_read_modify_write = ~m_PENT;
884 const int x86_read_modify = ~(m_PENT | m_PPRO);
885 const int x86_split_long_moves = m_PPRO;
886 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_CORE2 | m_GENERIC; /* m_PENT4 ? */
887 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
888 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
889 const int x86_qimode_math = ~(0);
890 const int x86_promote_qi_regs = 0;
891 /* On PPro this flag is meant to avoid partial register stalls. Just like
892 the x86_partial_reg_stall this option might be considered for Generic32
893 if our scheme for avoiding partial stalls was more effective. */
894 const int x86_himode_math = ~(m_PPRO);
895 const int x86_promote_hi_regs = m_PPRO;
896 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
897 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
898 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
899 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6_GEODE | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
900 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
901 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
902 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
903 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
904 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
905 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
906 const int x86_shift1 = ~m_486;
907 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
908 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
909 that thread 128bit SSE registers as single units versus K8 based chips that
910 divide SSE registers to two 64bit halves.
911 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
912 to allow register renaming on 128bit SSE units, but usually results in one
913 extra microop on 64bit SSE units. Experimental results shows that disabling
914 this option on P4 brings over 20% SPECfp regression, while enabling it on
915 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
916 of moves. */
917 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
918 /* Set for machines where the type and dependencies are resolved on SSE
919 register parts instead of whole registers, so we may maintain just
920 lower part of scalar values in proper format leaving the upper part
921 undefined. */
922 const int x86_sse_split_regs = m_ATHLON_K8;
923 const int x86_sse_typeless_stores = m_ATHLON_K8;
924 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
925 const int x86_use_ffreep = m_ATHLON_K8;
926 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE | m_CORE2;
927 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
928
929 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
930 integer data in xmm registers. Which results in pretty abysmal code. */
931 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
932
933 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
934 /* Some CPU cores are not able to predict more than 4 branch instructions in
935 the 16 byte window. */
936 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
937 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC;
938 const int x86_use_bt = m_ATHLON_K8;
939 /* Compare and exchange was added for 80486. */
940 const int x86_cmpxchg = ~m_386;
941 /* Compare and exchange 8 bytes was added for pentium. */
942 const int x86_cmpxchg8b = ~(m_386 | m_486);
943 /* Compare and exchange 16 bytes was added for nocona. */
944 const int x86_cmpxchg16b = m_NOCONA;
945 /* Exchange and add was added for 80486. */
946 const int x86_xadd = ~m_386;
947 /* Byteswap was added for 80486. */
948 const int x86_bswap = ~m_386;
949 const int x86_pad_returns = m_ATHLON_K8 | m_CORE2 | m_GENERIC;
950
951 /* In case the average insn count for single function invocation is
952 lower than this constant, emit fast (but longer) prologue and
953 epilogue code. */
954 #define FAST_PROLOGUE_INSN_COUNT 20
955
956 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
957 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
958 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
959 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
960
961 /* Array of the smallest class containing reg number REGNO, indexed by
962 REGNO. Used by REGNO_REG_CLASS in i386.h. */
963
964 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
965 {
966 /* ax, dx, cx, bx */
967 AREG, DREG, CREG, BREG,
968 /* si, di, bp, sp */
969 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
970 /* FP registers */
971 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
972 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
973 /* arg pointer */
974 NON_Q_REGS,
975 /* flags, fpsr, fpcr, dirflag, frame */
976 NO_REGS, NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
977 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
978 SSE_REGS, SSE_REGS,
979 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
980 MMX_REGS, MMX_REGS,
981 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
982 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
983 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
984 SSE_REGS, SSE_REGS,
985 };
986
987 /* The "default" register map used in 32bit mode. */
988
989 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
990 {
991 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
992 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
993 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
994 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
995 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
996 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
997 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
998 };
999
1000 static int const x86_64_int_parameter_registers[6] =
1001 {
1002 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1003 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1004 };
1005
1006 static int const x86_64_int_return_registers[4] =
1007 {
1008 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1009 };
1010
1011 /* The "default" register map used in 64bit mode. */
1012 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1013 {
1014 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1015 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1016 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1017 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1018 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1019 8,9,10,11,12,13,14,15, /* extended integer registers */
1020 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1021 };
1022
1023 /* Define the register numbers to be used in Dwarf debugging information.
1024 The SVR4 reference port C compiler uses the following register numbers
1025 in its Dwarf output code:
1026 0 for %eax (gcc regno = 0)
1027 1 for %ecx (gcc regno = 2)
1028 2 for %edx (gcc regno = 1)
1029 3 for %ebx (gcc regno = 3)
1030 4 for %esp (gcc regno = 7)
1031 5 for %ebp (gcc regno = 6)
1032 6 for %esi (gcc regno = 4)
1033 7 for %edi (gcc regno = 5)
1034 The following three DWARF register numbers are never generated by
1035 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1036 believes these numbers have these meanings.
1037 8 for %eip (no gcc equivalent)
1038 9 for %eflags (gcc regno = 17)
1039 10 for %trapno (no gcc equivalent)
1040 It is not at all clear how we should number the FP stack registers
1041 for the x86 architecture. If the version of SDB on x86/svr4 were
1042 a bit less brain dead with respect to floating-point then we would
1043 have a precedent to follow with respect to DWARF register numbers
1044 for x86 FP registers, but the SDB on x86/svr4 is so completely
1045 broken with respect to FP registers that it is hardly worth thinking
1046 of it as something to strive for compatibility with.
1047 The version of x86/svr4 SDB I have at the moment does (partially)
1048 seem to believe that DWARF register number 11 is associated with
1049 the x86 register %st(0), but that's about all. Higher DWARF
1050 register numbers don't seem to be associated with anything in
1051 particular, and even for DWARF regno 11, SDB only seems to under-
1052 stand that it should say that a variable lives in %st(0) (when
1053 asked via an `=' command) if we said it was in DWARF regno 11,
1054 but SDB still prints garbage when asked for the value of the
1055 variable in question (via a `/' command).
1056 (Also note that the labels SDB prints for various FP stack regs
1057 when doing an `x' command are all wrong.)
1058 Note that these problems generally don't affect the native SVR4
1059 C compiler because it doesn't allow the use of -O with -g and
1060 because when it is *not* optimizing, it allocates a memory
1061 location for each floating-point variable, and the memory
1062 location is what gets described in the DWARF AT_location
1063 attribute for the variable in question.
1064 Regardless of the severe mental illness of the x86/svr4 SDB, we
1065 do something sensible here and we use the following DWARF
1066 register numbers. Note that these are all stack-top-relative
1067 numbers.
1068 11 for %st(0) (gcc regno = 8)
1069 12 for %st(1) (gcc regno = 9)
1070 13 for %st(2) (gcc regno = 10)
1071 14 for %st(3) (gcc regno = 11)
1072 15 for %st(4) (gcc regno = 12)
1073 16 for %st(5) (gcc regno = 13)
1074 17 for %st(6) (gcc regno = 14)
1075 18 for %st(7) (gcc regno = 15)
1076 */
1077 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1078 {
1079 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1080 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1081 -1, 9, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1082 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1083 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1084 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1085 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1086 };
1087
1088 /* Test and compare insns in i386.md store the information needed to
1089 generate branch and scc insns here. */
1090
1091 rtx ix86_compare_op0 = NULL_RTX;
1092 rtx ix86_compare_op1 = NULL_RTX;
1093 rtx ix86_compare_emitted = NULL_RTX;
1094
1095 /* Size of the register save area. */
1096 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1097
1098 /* Define the structure for the machine field in struct function. */
1099
1100 struct stack_local_entry GTY(())
1101 {
1102 unsigned short mode;
1103 unsigned short n;
1104 rtx rtl;
1105 struct stack_local_entry *next;
1106 };
1107
1108 /* Structure describing stack frame layout.
1109 Stack grows downward:
1110
1111 [arguments]
1112 <- ARG_POINTER
1113 saved pc
1114
1115 saved frame pointer if frame_pointer_needed
1116 <- HARD_FRAME_POINTER
1117 [saved regs]
1118
1119 [padding1] \
1120 )
1121 [va_arg registers] (
1122 > to_allocate <- FRAME_POINTER
1123 [frame] (
1124 )
1125 [padding2] /
1126 */
1127 struct ix86_frame
1128 {
1129 int nregs;
1130 int padding1;
1131 int va_arg_size;
1132 HOST_WIDE_INT frame;
1133 int padding2;
1134 int outgoing_arguments_size;
1135 int red_zone_size;
1136
1137 HOST_WIDE_INT to_allocate;
1138 /* The offsets relative to ARG_POINTER. */
1139 HOST_WIDE_INT frame_pointer_offset;
1140 HOST_WIDE_INT hard_frame_pointer_offset;
1141 HOST_WIDE_INT stack_pointer_offset;
1142
1143 /* When save_regs_using_mov is set, emit prologue using
1144 move instead of push instructions. */
1145 bool save_regs_using_mov;
1146 };
1147
1148 /* Code model option. */
1149 enum cmodel ix86_cmodel;
1150 /* Asm dialect. */
1151 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1152 /* TLS dialects. */
1153 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1154
1155 /* Which unit we are generating floating point math for. */
1156 enum fpmath_unit ix86_fpmath;
1157
1158 /* Which cpu are we scheduling for. */
1159 enum processor_type ix86_tune;
1160 /* Which instruction set architecture to use. */
1161 enum processor_type ix86_arch;
1162
1163 /* true if sse prefetch instruction is not NOOP. */
1164 int x86_prefetch_sse;
1165
1166 /* ix86_regparm_string as a number */
1167 static int ix86_regparm;
1168
1169 /* -mstackrealign option */
1170 extern int ix86_force_align_arg_pointer;
1171 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1172
1173 /* Preferred alignment for stack boundary in bits. */
1174 unsigned int ix86_preferred_stack_boundary;
1175
1176 /* Values 1-5: see jump.c */
1177 int ix86_branch_cost;
1178
1179 /* Variables which are this size or smaller are put in the data/bss
1180 or ldata/lbss sections. */
1181
1182 int ix86_section_threshold = 65536;
1183
1184 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1185 char internal_label_prefix[16];
1186 int internal_label_prefix_len;
1187 \f
1188 static bool ix86_handle_option (size_t, const char *, int);
1189 static void output_pic_addr_const (FILE *, rtx, int);
1190 static void put_condition_code (enum rtx_code, enum machine_mode,
1191 int, int, FILE *);
1192 static const char *get_some_local_dynamic_name (void);
1193 static int get_some_local_dynamic_name_1 (rtx *, void *);
1194 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1195 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1196 rtx *);
1197 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1198 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1199 enum machine_mode);
1200 static rtx get_thread_pointer (int);
1201 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1202 static void get_pc_thunk_name (char [32], unsigned int);
1203 static rtx gen_push (rtx);
1204 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1205 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1206 static struct machine_function * ix86_init_machine_status (void);
1207 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1208 static int ix86_nsaved_regs (void);
1209 static void ix86_emit_save_regs (void);
1210 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1211 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1212 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1213 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1214 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1215 static rtx ix86_expand_aligntest (rtx, int);
1216 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1217 static int ix86_issue_rate (void);
1218 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1219 static int ia32_multipass_dfa_lookahead (void);
1220 static void ix86_init_mmx_sse_builtins (void);
1221 static rtx x86_this_parameter (tree);
1222 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1223 HOST_WIDE_INT, tree);
1224 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1225 static void x86_file_start (void);
1226 static void ix86_reorg (void);
1227 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1228 static tree ix86_build_builtin_va_list (void);
1229 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1230 tree, int *, int);
1231 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1232 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1233 static bool ix86_vector_mode_supported_p (enum machine_mode);
1234
1235 static int ix86_address_cost (rtx);
1236 static bool ix86_cannot_force_const_mem (rtx);
1237 static rtx ix86_delegitimize_address (rtx);
1238
1239 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1240
1241 struct builtin_description;
1242 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1243 tree, rtx);
1244 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1245 tree, rtx);
1246 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1247 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1248 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1249 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1250 static rtx safe_vector_operand (rtx, enum machine_mode);
1251 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1252 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1253 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1254 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1255 static int ix86_fp_comparison_cost (enum rtx_code code);
1256 static unsigned int ix86_select_alt_pic_regnum (void);
1257 static int ix86_save_reg (unsigned int, int);
1258 static void ix86_compute_frame_layout (struct ix86_frame *);
1259 static int ix86_comp_type_attributes (tree, tree);
1260 static int ix86_function_regparm (tree, tree);
1261 const struct attribute_spec ix86_attribute_table[];
1262 static bool ix86_function_ok_for_sibcall (tree, tree);
1263 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1264 static int ix86_value_regno (enum machine_mode, tree, tree);
1265 static bool contains_128bit_aligned_vector_p (tree);
1266 static rtx ix86_struct_value_rtx (tree, int);
1267 static bool ix86_ms_bitfield_layout_p (tree);
1268 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1269 static int extended_reg_mentioned_1 (rtx *, void *);
1270 static bool ix86_rtx_costs (rtx, int, int, int *);
1271 static int min_insn_size (rtx);
1272 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1273 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1274 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1275 tree, bool);
1276 static void ix86_init_builtins (void);
1277 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1278 static const char *ix86_mangle_fundamental_type (tree);
1279 static tree ix86_stack_protect_fail (void);
1280 static rtx ix86_internal_arg_pointer (void);
1281 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1282
1283 /* This function is only used on Solaris. */
1284 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1285 ATTRIBUTE_UNUSED;
1286
1287 /* Register class used for passing given 64bit part of the argument.
1288 These represent classes as documented by the PS ABI, with the exception
1289 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1290 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1291
1292 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1293 whenever possible (upper half does contain padding).
1294 */
1295 enum x86_64_reg_class
1296 {
1297 X86_64_NO_CLASS,
1298 X86_64_INTEGER_CLASS,
1299 X86_64_INTEGERSI_CLASS,
1300 X86_64_SSE_CLASS,
1301 X86_64_SSESF_CLASS,
1302 X86_64_SSEDF_CLASS,
1303 X86_64_SSEUP_CLASS,
1304 X86_64_X87_CLASS,
1305 X86_64_X87UP_CLASS,
1306 X86_64_COMPLEX_X87_CLASS,
1307 X86_64_MEMORY_CLASS
1308 };
1309 static const char * const x86_64_reg_class_name[] = {
1310 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1311 "sseup", "x87", "x87up", "cplx87", "no"
1312 };
1313
1314 #define MAX_CLASSES 4
1315
1316 /* Table of constants used by fldpi, fldln2, etc.... */
1317 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1318 static bool ext_80387_constants_init = 0;
1319 static void init_ext_80387_constants (void);
1320 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1321 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1322 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1323 static section *x86_64_elf_select_section (tree decl, int reloc,
1324 unsigned HOST_WIDE_INT align)
1325 ATTRIBUTE_UNUSED;
1326 \f
1327 /* Initialize the GCC target structure. */
1328 #undef TARGET_ATTRIBUTE_TABLE
1329 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1330 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1331 # undef TARGET_MERGE_DECL_ATTRIBUTES
1332 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1333 #endif
1334
1335 #undef TARGET_COMP_TYPE_ATTRIBUTES
1336 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1337
1338 #undef TARGET_INIT_BUILTINS
1339 #define TARGET_INIT_BUILTINS ix86_init_builtins
1340 #undef TARGET_EXPAND_BUILTIN
1341 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1342
1343 #undef TARGET_ASM_FUNCTION_EPILOGUE
1344 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1345
1346 #undef TARGET_ENCODE_SECTION_INFO
1347 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1348 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1349 #else
1350 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1351 #endif
1352
1353 #undef TARGET_ASM_OPEN_PAREN
1354 #define TARGET_ASM_OPEN_PAREN ""
1355 #undef TARGET_ASM_CLOSE_PAREN
1356 #define TARGET_ASM_CLOSE_PAREN ""
1357
1358 #undef TARGET_ASM_ALIGNED_HI_OP
1359 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1360 #undef TARGET_ASM_ALIGNED_SI_OP
1361 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1362 #ifdef ASM_QUAD
1363 #undef TARGET_ASM_ALIGNED_DI_OP
1364 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1365 #endif
1366
1367 #undef TARGET_ASM_UNALIGNED_HI_OP
1368 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1369 #undef TARGET_ASM_UNALIGNED_SI_OP
1370 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1371 #undef TARGET_ASM_UNALIGNED_DI_OP
1372 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1373
1374 #undef TARGET_SCHED_ADJUST_COST
1375 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1376 #undef TARGET_SCHED_ISSUE_RATE
1377 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1378 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1379 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1380 ia32_multipass_dfa_lookahead
1381
1382 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1383 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1384
1385 #ifdef HAVE_AS_TLS
1386 #undef TARGET_HAVE_TLS
1387 #define TARGET_HAVE_TLS true
1388 #endif
1389 #undef TARGET_CANNOT_FORCE_CONST_MEM
1390 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1391 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1392 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1393
1394 #undef TARGET_DELEGITIMIZE_ADDRESS
1395 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1396
1397 #undef TARGET_MS_BITFIELD_LAYOUT_P
1398 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1399
1400 #if TARGET_MACHO
1401 #undef TARGET_BINDS_LOCAL_P
1402 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1403 #endif
1404
1405 #undef TARGET_ASM_OUTPUT_MI_THUNK
1406 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1407 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1408 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1409
1410 #undef TARGET_ASM_FILE_START
1411 #define TARGET_ASM_FILE_START x86_file_start
1412
1413 #undef TARGET_DEFAULT_TARGET_FLAGS
1414 #define TARGET_DEFAULT_TARGET_FLAGS \
1415 (TARGET_DEFAULT \
1416 | TARGET_64BIT_DEFAULT \
1417 | TARGET_SUBTARGET_DEFAULT \
1418 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1419
1420 #undef TARGET_HANDLE_OPTION
1421 #define TARGET_HANDLE_OPTION ix86_handle_option
1422
1423 #undef TARGET_RTX_COSTS
1424 #define TARGET_RTX_COSTS ix86_rtx_costs
1425 #undef TARGET_ADDRESS_COST
1426 #define TARGET_ADDRESS_COST ix86_address_cost
1427
1428 #undef TARGET_FIXED_CONDITION_CODE_REGS
1429 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1430 #undef TARGET_CC_MODES_COMPATIBLE
1431 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1432
1433 #undef TARGET_MACHINE_DEPENDENT_REORG
1434 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1435
1436 #undef TARGET_BUILD_BUILTIN_VA_LIST
1437 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1438
1439 #undef TARGET_MD_ASM_CLOBBERS
1440 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1441
1442 #undef TARGET_PROMOTE_PROTOTYPES
1443 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1444 #undef TARGET_STRUCT_VALUE_RTX
1445 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1446 #undef TARGET_SETUP_INCOMING_VARARGS
1447 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1448 #undef TARGET_MUST_PASS_IN_STACK
1449 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1450 #undef TARGET_PASS_BY_REFERENCE
1451 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1452 #undef TARGET_INTERNAL_ARG_POINTER
1453 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1454 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1455 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1456
1457 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1458 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1459
1460 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1461 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1462
1463 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1464 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1465
1466 #ifdef HAVE_AS_TLS
1467 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1468 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1469 #endif
1470
1471 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1472 #undef TARGET_INSERT_ATTRIBUTES
1473 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1474 #endif
1475
1476 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1477 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1478
1479 #undef TARGET_STACK_PROTECT_FAIL
1480 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1481
1482 #undef TARGET_FUNCTION_VALUE
1483 #define TARGET_FUNCTION_VALUE ix86_function_value
1484
1485 struct gcc_target targetm = TARGET_INITIALIZER;
1486
1487 \f
1488 /* The svr4 ABI for the i386 says that records and unions are returned
1489 in memory. */
1490 #ifndef DEFAULT_PCC_STRUCT_RETURN
1491 #define DEFAULT_PCC_STRUCT_RETURN 1
1492 #endif
1493
1494 /* Implement TARGET_HANDLE_OPTION. */
1495
1496 static bool
1497 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1498 {
1499 switch (code)
1500 {
1501 case OPT_m3dnow:
1502 if (!value)
1503 {
1504 target_flags &= ~MASK_3DNOW_A;
1505 target_flags_explicit |= MASK_3DNOW_A;
1506 }
1507 return true;
1508
1509 case OPT_mmmx:
1510 if (!value)
1511 {
1512 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1513 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1514 }
1515 return true;
1516
1517 case OPT_msse:
1518 if (!value)
1519 {
1520 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1521 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1522 }
1523 return true;
1524
1525 case OPT_msse2:
1526 if (!value)
1527 {
1528 target_flags &= ~MASK_SSE3;
1529 target_flags_explicit |= MASK_SSE3;
1530 }
1531 return true;
1532
1533 default:
1534 return true;
1535 }
1536 }
1537
1538 /* Sometimes certain combinations of command options do not make
1539 sense on a particular target machine. You can define a macro
1540 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1541 defined, is executed once just after all the command options have
1542 been parsed.
1543
1544 Don't use this macro to turn on various extra optimizations for
1545 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1546
1547 void
1548 override_options (void)
1549 {
1550 int i;
1551 int ix86_tune_defaulted = 0;
1552
1553 /* Comes from final.c -- no real reason to change it. */
1554 #define MAX_CODE_ALIGN 16
1555
1556 static struct ptt
1557 {
1558 const struct processor_costs *cost; /* Processor costs */
1559 const int target_enable; /* Target flags to enable. */
1560 const int target_disable; /* Target flags to disable. */
1561 const int align_loop; /* Default alignments. */
1562 const int align_loop_max_skip;
1563 const int align_jump;
1564 const int align_jump_max_skip;
1565 const int align_func;
1566 }
1567 const processor_target_table[PROCESSOR_max] =
1568 {
1569 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1570 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1571 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1572 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1573 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1574 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1575 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1576 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1577 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1578 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1579 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1580 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1581 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1582 };
1583
1584 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1585 static struct pta
1586 {
1587 const char *const name; /* processor name or nickname. */
1588 const enum processor_type processor;
1589 const enum pta_flags
1590 {
1591 PTA_SSE = 1,
1592 PTA_SSE2 = 2,
1593 PTA_SSE3 = 4,
1594 PTA_MMX = 8,
1595 PTA_PREFETCH_SSE = 16,
1596 PTA_3DNOW = 32,
1597 PTA_3DNOW_A = 64,
1598 PTA_64BIT = 128,
1599 PTA_SSSE3 = 256
1600 } flags;
1601 }
1602 const processor_alias_table[] =
1603 {
1604 {"i386", PROCESSOR_I386, 0},
1605 {"i486", PROCESSOR_I486, 0},
1606 {"i586", PROCESSOR_PENTIUM, 0},
1607 {"pentium", PROCESSOR_PENTIUM, 0},
1608 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1609 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1610 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1611 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1612 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1613 {"i686", PROCESSOR_PENTIUMPRO, 0},
1614 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1615 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1616 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1617 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1618 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1619 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1620 | PTA_MMX | PTA_PREFETCH_SSE},
1621 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1622 | PTA_MMX | PTA_PREFETCH_SSE},
1623 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1624 | PTA_MMX | PTA_PREFETCH_SSE},
1625 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1626 | PTA_MMX | PTA_PREFETCH_SSE},
1627 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3
1628 | PTA_64BIT | PTA_MMX
1629 | PTA_PREFETCH_SSE},
1630 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1631 | PTA_3DNOW_A},
1632 {"k6", PROCESSOR_K6, PTA_MMX},
1633 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1634 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1635 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1636 | PTA_3DNOW_A},
1637 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1638 | PTA_3DNOW | PTA_3DNOW_A},
1639 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1640 | PTA_3DNOW_A | PTA_SSE},
1641 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1642 | PTA_3DNOW_A | PTA_SSE},
1643 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1644 | PTA_3DNOW_A | PTA_SSE},
1645 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1646 | PTA_SSE | PTA_SSE2 },
1647 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1648 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1649 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1650 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1651 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1652 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1653 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1654 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1655 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1656 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1657 };
1658
1659 int const pta_size = ARRAY_SIZE (processor_alias_table);
1660
1661 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1662 SUBTARGET_OVERRIDE_OPTIONS;
1663 #endif
1664
1665 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1666 SUBSUBTARGET_OVERRIDE_OPTIONS;
1667 #endif
1668
1669 /* -fPIC is the default for x86_64. */
1670 if (TARGET_MACHO && TARGET_64BIT)
1671 flag_pic = 2;
1672
1673 /* Set the default values for switches whose default depends on TARGET_64BIT
1674 in case they weren't overwritten by command line options. */
1675 if (TARGET_64BIT)
1676 {
1677 /* Mach-O doesn't support omitting the frame pointer for now. */
1678 if (flag_omit_frame_pointer == 2)
1679 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1680 if (flag_asynchronous_unwind_tables == 2)
1681 flag_asynchronous_unwind_tables = 1;
1682 if (flag_pcc_struct_return == 2)
1683 flag_pcc_struct_return = 0;
1684 }
1685 else
1686 {
1687 if (flag_omit_frame_pointer == 2)
1688 flag_omit_frame_pointer = 0;
1689 if (flag_asynchronous_unwind_tables == 2)
1690 flag_asynchronous_unwind_tables = 0;
1691 if (flag_pcc_struct_return == 2)
1692 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1693 }
1694
1695 /* Need to check -mtune=generic first. */
1696 if (ix86_tune_string)
1697 {
1698 if (!strcmp (ix86_tune_string, "generic")
1699 || !strcmp (ix86_tune_string, "i686")
1700 /* As special support for cross compilers we read -mtune=native
1701 as -mtune=generic. With native compilers we won't see the
1702 -mtune=native, as it was changed by the driver. */
1703 || !strcmp (ix86_tune_string, "native"))
1704 {
1705 if (TARGET_64BIT)
1706 ix86_tune_string = "generic64";
1707 else
1708 ix86_tune_string = "generic32";
1709 }
1710 else if (!strncmp (ix86_tune_string, "generic", 7))
1711 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1712 }
1713 else
1714 {
1715 if (ix86_arch_string)
1716 ix86_tune_string = ix86_arch_string;
1717 if (!ix86_tune_string)
1718 {
1719 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1720 ix86_tune_defaulted = 1;
1721 }
1722
1723 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1724 need to use a sensible tune option. */
1725 if (!strcmp (ix86_tune_string, "generic")
1726 || !strcmp (ix86_tune_string, "x86-64")
1727 || !strcmp (ix86_tune_string, "i686"))
1728 {
1729 if (TARGET_64BIT)
1730 ix86_tune_string = "generic64";
1731 else
1732 ix86_tune_string = "generic32";
1733 }
1734 }
1735 if (!strcmp (ix86_tune_string, "x86-64"))
1736 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1737 "-mtune=generic instead as appropriate.");
1738
1739 if (!ix86_arch_string)
1740 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1741 if (!strcmp (ix86_arch_string, "generic"))
1742 error ("generic CPU can be used only for -mtune= switch");
1743 if (!strncmp (ix86_arch_string, "generic", 7))
1744 error ("bad value (%s) for -march= switch", ix86_arch_string);
1745
1746 if (ix86_cmodel_string != 0)
1747 {
1748 if (!strcmp (ix86_cmodel_string, "small"))
1749 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1750 else if (!strcmp (ix86_cmodel_string, "medium"))
1751 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1752 else if (flag_pic)
1753 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1754 else if (!strcmp (ix86_cmodel_string, "32"))
1755 ix86_cmodel = CM_32;
1756 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1757 ix86_cmodel = CM_KERNEL;
1758 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1759 ix86_cmodel = CM_LARGE;
1760 else
1761 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1762 }
1763 else
1764 {
1765 ix86_cmodel = CM_32;
1766 if (TARGET_64BIT)
1767 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1768 }
1769 if (ix86_asm_string != 0)
1770 {
1771 if (! TARGET_MACHO
1772 && !strcmp (ix86_asm_string, "intel"))
1773 ix86_asm_dialect = ASM_INTEL;
1774 else if (!strcmp (ix86_asm_string, "att"))
1775 ix86_asm_dialect = ASM_ATT;
1776 else
1777 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1778 }
1779 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1780 error ("code model %qs not supported in the %s bit mode",
1781 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1782 if (ix86_cmodel == CM_LARGE)
1783 sorry ("code model %<large%> not supported yet");
1784 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1785 sorry ("%i-bit mode not compiled in",
1786 (target_flags & MASK_64BIT) ? 64 : 32);
1787
1788 for (i = 0; i < pta_size; i++)
1789 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1790 {
1791 ix86_arch = processor_alias_table[i].processor;
1792 /* Default cpu tuning to the architecture. */
1793 ix86_tune = ix86_arch;
1794 if (processor_alias_table[i].flags & PTA_MMX
1795 && !(target_flags_explicit & MASK_MMX))
1796 target_flags |= MASK_MMX;
1797 if (processor_alias_table[i].flags & PTA_3DNOW
1798 && !(target_flags_explicit & MASK_3DNOW))
1799 target_flags |= MASK_3DNOW;
1800 if (processor_alias_table[i].flags & PTA_3DNOW_A
1801 && !(target_flags_explicit & MASK_3DNOW_A))
1802 target_flags |= MASK_3DNOW_A;
1803 if (processor_alias_table[i].flags & PTA_SSE
1804 && !(target_flags_explicit & MASK_SSE))
1805 target_flags |= MASK_SSE;
1806 if (processor_alias_table[i].flags & PTA_SSE2
1807 && !(target_flags_explicit & MASK_SSE2))
1808 target_flags |= MASK_SSE2;
1809 if (processor_alias_table[i].flags & PTA_SSE3
1810 && !(target_flags_explicit & MASK_SSE3))
1811 target_flags |= MASK_SSE3;
1812 if (processor_alias_table[i].flags & PTA_SSSE3
1813 && !(target_flags_explicit & MASK_SSSE3))
1814 target_flags |= MASK_SSSE3;
1815 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1816 x86_prefetch_sse = true;
1817 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1818 error ("CPU you selected does not support x86-64 "
1819 "instruction set");
1820 break;
1821 }
1822
1823 if (i == pta_size)
1824 error ("bad value (%s) for -march= switch", ix86_arch_string);
1825
1826 for (i = 0; i < pta_size; i++)
1827 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1828 {
1829 ix86_tune = processor_alias_table[i].processor;
1830 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1831 {
1832 if (ix86_tune_defaulted)
1833 {
1834 ix86_tune_string = "x86-64";
1835 for (i = 0; i < pta_size; i++)
1836 if (! strcmp (ix86_tune_string,
1837 processor_alias_table[i].name))
1838 break;
1839 ix86_tune = processor_alias_table[i].processor;
1840 }
1841 else
1842 error ("CPU you selected does not support x86-64 "
1843 "instruction set");
1844 }
1845 /* Intel CPUs have always interpreted SSE prefetch instructions as
1846 NOPs; so, we can enable SSE prefetch instructions even when
1847 -mtune (rather than -march) points us to a processor that has them.
1848 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1849 higher processors. */
1850 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1851 x86_prefetch_sse = true;
1852 break;
1853 }
1854 if (i == pta_size)
1855 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1856
1857 if (optimize_size)
1858 ix86_cost = &size_cost;
1859 else
1860 ix86_cost = processor_target_table[ix86_tune].cost;
1861 target_flags |= processor_target_table[ix86_tune].target_enable;
1862 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1863
1864 /* Arrange to set up i386_stack_locals for all functions. */
1865 init_machine_status = ix86_init_machine_status;
1866
1867 /* Validate -mregparm= value. */
1868 if (ix86_regparm_string)
1869 {
1870 i = atoi (ix86_regparm_string);
1871 if (i < 0 || i > REGPARM_MAX)
1872 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1873 else
1874 ix86_regparm = i;
1875 }
1876 else
1877 if (TARGET_64BIT)
1878 ix86_regparm = REGPARM_MAX;
1879
1880 /* If the user has provided any of the -malign-* options,
1881 warn and use that value only if -falign-* is not set.
1882 Remove this code in GCC 3.2 or later. */
1883 if (ix86_align_loops_string)
1884 {
1885 warning (0, "-malign-loops is obsolete, use -falign-loops");
1886 if (align_loops == 0)
1887 {
1888 i = atoi (ix86_align_loops_string);
1889 if (i < 0 || i > MAX_CODE_ALIGN)
1890 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1891 else
1892 align_loops = 1 << i;
1893 }
1894 }
1895
1896 if (ix86_align_jumps_string)
1897 {
1898 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1899 if (align_jumps == 0)
1900 {
1901 i = atoi (ix86_align_jumps_string);
1902 if (i < 0 || i > MAX_CODE_ALIGN)
1903 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1904 else
1905 align_jumps = 1 << i;
1906 }
1907 }
1908
1909 if (ix86_align_funcs_string)
1910 {
1911 warning (0, "-malign-functions is obsolete, use -falign-functions");
1912 if (align_functions == 0)
1913 {
1914 i = atoi (ix86_align_funcs_string);
1915 if (i < 0 || i > MAX_CODE_ALIGN)
1916 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1917 else
1918 align_functions = 1 << i;
1919 }
1920 }
1921
1922 /* Default align_* from the processor table. */
1923 if (align_loops == 0)
1924 {
1925 align_loops = processor_target_table[ix86_tune].align_loop;
1926 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1927 }
1928 if (align_jumps == 0)
1929 {
1930 align_jumps = processor_target_table[ix86_tune].align_jump;
1931 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1932 }
1933 if (align_functions == 0)
1934 {
1935 align_functions = processor_target_table[ix86_tune].align_func;
1936 }
1937
1938 /* Validate -mbranch-cost= value, or provide default. */
1939 ix86_branch_cost = ix86_cost->branch_cost;
1940 if (ix86_branch_cost_string)
1941 {
1942 i = atoi (ix86_branch_cost_string);
1943 if (i < 0 || i > 5)
1944 error ("-mbranch-cost=%d is not between 0 and 5", i);
1945 else
1946 ix86_branch_cost = i;
1947 }
1948 if (ix86_section_threshold_string)
1949 {
1950 i = atoi (ix86_section_threshold_string);
1951 if (i < 0)
1952 error ("-mlarge-data-threshold=%d is negative", i);
1953 else
1954 ix86_section_threshold = i;
1955 }
1956
1957 if (ix86_tls_dialect_string)
1958 {
1959 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1960 ix86_tls_dialect = TLS_DIALECT_GNU;
1961 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1962 ix86_tls_dialect = TLS_DIALECT_GNU2;
1963 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1964 ix86_tls_dialect = TLS_DIALECT_SUN;
1965 else
1966 error ("bad value (%s) for -mtls-dialect= switch",
1967 ix86_tls_dialect_string);
1968 }
1969
1970 /* Keep nonleaf frame pointers. */
1971 if (flag_omit_frame_pointer)
1972 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1973 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1974 flag_omit_frame_pointer = 1;
1975
1976 /* If we're doing fast math, we don't care about comparison order
1977 wrt NaNs. This lets us use a shorter comparison sequence. */
1978 if (flag_finite_math_only)
1979 target_flags &= ~MASK_IEEE_FP;
1980
1981 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1982 since the insns won't need emulation. */
1983 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1984 target_flags &= ~MASK_NO_FANCY_MATH_387;
1985
1986 /* Likewise, if the target doesn't have a 387, or we've specified
1987 software floating point, don't use 387 inline intrinsics. */
1988 if (!TARGET_80387)
1989 target_flags |= MASK_NO_FANCY_MATH_387;
1990
1991 /* Turn on SSE3 builtins for -mssse3. */
1992 if (TARGET_SSSE3)
1993 target_flags |= MASK_SSE3;
1994
1995 /* Turn on SSE2 builtins for -msse3. */
1996 if (TARGET_SSE3)
1997 target_flags |= MASK_SSE2;
1998
1999 /* Turn on SSE builtins for -msse2. */
2000 if (TARGET_SSE2)
2001 target_flags |= MASK_SSE;
2002
2003 /* Turn on MMX builtins for -msse. */
2004 if (TARGET_SSE)
2005 {
2006 target_flags |= MASK_MMX & ~target_flags_explicit;
2007 x86_prefetch_sse = true;
2008 }
2009
2010 /* Turn on MMX builtins for 3Dnow. */
2011 if (TARGET_3DNOW)
2012 target_flags |= MASK_MMX;
2013
2014 if (TARGET_64BIT)
2015 {
2016 if (TARGET_ALIGN_DOUBLE)
2017 error ("-malign-double makes no sense in the 64bit mode");
2018 if (TARGET_RTD)
2019 error ("-mrtd calling convention not supported in the 64bit mode");
2020
2021 /* Enable by default the SSE and MMX builtins. Do allow the user to
2022 explicitly disable any of these. In particular, disabling SSE and
2023 MMX for kernel code is extremely useful. */
2024 target_flags
2025 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2026 & ~target_flags_explicit);
2027 }
2028 else
2029 {
2030 /* i386 ABI does not specify red zone. It still makes sense to use it
2031 when programmer takes care to stack from being destroyed. */
2032 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2033 target_flags |= MASK_NO_RED_ZONE;
2034 }
2035
2036 /* Validate -mpreferred-stack-boundary= value, or provide default.
2037 The default of 128 bits is for Pentium III's SSE __m128. We can't
2038 change it because of optimize_size. Otherwise, we can't mix object
2039 files compiled with -Os and -On. */
2040 ix86_preferred_stack_boundary = 128;
2041 if (ix86_preferred_stack_boundary_string)
2042 {
2043 i = atoi (ix86_preferred_stack_boundary_string);
2044 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2045 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2046 TARGET_64BIT ? 4 : 2);
2047 else
2048 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2049 }
2050
2051 /* Accept -mx87regparm only if 80387 support is enabled. */
2052 if (TARGET_X87REGPARM
2053 && ! TARGET_80387)
2054 error ("-mx87regparm used without 80387 enabled");
2055
2056 /* Accept -msseregparm only if at least SSE support is enabled. */
2057 if (TARGET_SSEREGPARM
2058 && ! TARGET_SSE)
2059 error ("-msseregparm used without SSE enabled");
2060
2061 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2062
2063 if (ix86_fpmath_string != 0)
2064 {
2065 if (! strcmp (ix86_fpmath_string, "387"))
2066 ix86_fpmath = FPMATH_387;
2067 else if (! strcmp (ix86_fpmath_string, "sse"))
2068 {
2069 if (!TARGET_SSE)
2070 {
2071 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2072 ix86_fpmath = FPMATH_387;
2073 }
2074 else
2075 ix86_fpmath = FPMATH_SSE;
2076 }
2077 else if (! strcmp (ix86_fpmath_string, "387,sse")
2078 || ! strcmp (ix86_fpmath_string, "sse,387"))
2079 {
2080 if (!TARGET_SSE)
2081 {
2082 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2083 ix86_fpmath = FPMATH_387;
2084 }
2085 else if (!TARGET_80387)
2086 {
2087 warning (0, "387 instruction set disabled, using SSE arithmetics");
2088 ix86_fpmath = FPMATH_SSE;
2089 }
2090 else
2091 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2092 }
2093 else
2094 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2095 }
2096
2097 /* If the i387 is disabled, then do not return values in it. */
2098 if (!TARGET_80387)
2099 target_flags &= ~MASK_FLOAT_RETURNS;
2100
2101 if ((x86_accumulate_outgoing_args & TUNEMASK)
2102 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2103 && !optimize_size)
2104 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2105
2106 /* ??? Unwind info is not correct around the CFG unless either a frame
2107 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2108 unwind info generation to be aware of the CFG and propagating states
2109 around edges. */
2110 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2111 || flag_exceptions || flag_non_call_exceptions)
2112 && flag_omit_frame_pointer
2113 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2114 {
2115 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2116 warning (0, "unwind tables currently require either a frame pointer "
2117 "or -maccumulate-outgoing-args for correctness");
2118 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2119 }
2120
2121 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2122 {
2123 char *p;
2124 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2125 p = strchr (internal_label_prefix, 'X');
2126 internal_label_prefix_len = p - internal_label_prefix;
2127 *p = '\0';
2128 }
2129
2130 /* When scheduling description is not available, disable scheduler pass
2131 so it won't slow down the compilation and make x87 code slower. */
2132 if (!TARGET_SCHEDULE)
2133 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2134
2135 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2136 set_param_value ("simultaneous-prefetches",
2137 ix86_cost->simultaneous_prefetches);
2138 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2139 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2140 }
2141 \f
2142 /* switch to the appropriate section for output of DECL.
2143 DECL is either a `VAR_DECL' node or a constant of some sort.
2144 RELOC indicates whether forming the initial value of DECL requires
2145 link-time relocations. */
2146
2147 static section *
2148 x86_64_elf_select_section (tree decl, int reloc,
2149 unsigned HOST_WIDE_INT align)
2150 {
2151 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2152 && ix86_in_large_data_p (decl))
2153 {
2154 const char *sname = NULL;
2155 unsigned int flags = SECTION_WRITE;
2156 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2157 {
2158 case SECCAT_DATA:
2159 sname = ".ldata";
2160 break;
2161 case SECCAT_DATA_REL:
2162 sname = ".ldata.rel";
2163 break;
2164 case SECCAT_DATA_REL_LOCAL:
2165 sname = ".ldata.rel.local";
2166 break;
2167 case SECCAT_DATA_REL_RO:
2168 sname = ".ldata.rel.ro";
2169 break;
2170 case SECCAT_DATA_REL_RO_LOCAL:
2171 sname = ".ldata.rel.ro.local";
2172 break;
2173 case SECCAT_BSS:
2174 sname = ".lbss";
2175 flags |= SECTION_BSS;
2176 break;
2177 case SECCAT_RODATA:
2178 case SECCAT_RODATA_MERGE_STR:
2179 case SECCAT_RODATA_MERGE_STR_INIT:
2180 case SECCAT_RODATA_MERGE_CONST:
2181 sname = ".lrodata";
2182 flags = 0;
2183 break;
2184 case SECCAT_SRODATA:
2185 case SECCAT_SDATA:
2186 case SECCAT_SBSS:
2187 gcc_unreachable ();
2188 case SECCAT_TEXT:
2189 case SECCAT_TDATA:
2190 case SECCAT_TBSS:
2191 /* We don't split these for medium model. Place them into
2192 default sections and hope for best. */
2193 break;
2194 }
2195 if (sname)
2196 {
2197 /* We might get called with string constants, but get_named_section
2198 doesn't like them as they are not DECLs. Also, we need to set
2199 flags in that case. */
2200 if (!DECL_P (decl))
2201 return get_section (sname, flags, NULL);
2202 return get_named_section (decl, sname, reloc);
2203 }
2204 }
2205 return default_elf_select_section (decl, reloc, align);
2206 }
2207
2208 /* Build up a unique section name, expressed as a
2209 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2210 RELOC indicates whether the initial value of EXP requires
2211 link-time relocations. */
2212
2213 static void
2214 x86_64_elf_unique_section (tree decl, int reloc)
2215 {
2216 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2217 && ix86_in_large_data_p (decl))
2218 {
2219 const char *prefix = NULL;
2220 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2221 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2222
2223 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2224 {
2225 case SECCAT_DATA:
2226 case SECCAT_DATA_REL:
2227 case SECCAT_DATA_REL_LOCAL:
2228 case SECCAT_DATA_REL_RO:
2229 case SECCAT_DATA_REL_RO_LOCAL:
2230 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2231 break;
2232 case SECCAT_BSS:
2233 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2234 break;
2235 case SECCAT_RODATA:
2236 case SECCAT_RODATA_MERGE_STR:
2237 case SECCAT_RODATA_MERGE_STR_INIT:
2238 case SECCAT_RODATA_MERGE_CONST:
2239 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2240 break;
2241 case SECCAT_SRODATA:
2242 case SECCAT_SDATA:
2243 case SECCAT_SBSS:
2244 gcc_unreachable ();
2245 case SECCAT_TEXT:
2246 case SECCAT_TDATA:
2247 case SECCAT_TBSS:
2248 /* We don't split these for medium model. Place them into
2249 default sections and hope for best. */
2250 break;
2251 }
2252 if (prefix)
2253 {
2254 const char *name;
2255 size_t nlen, plen;
2256 char *string;
2257 plen = strlen (prefix);
2258
2259 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2260 name = targetm.strip_name_encoding (name);
2261 nlen = strlen (name);
2262
2263 string = alloca (nlen + plen + 1);
2264 memcpy (string, prefix, plen);
2265 memcpy (string + plen, name, nlen + 1);
2266
2267 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2268 return;
2269 }
2270 }
2271 default_unique_section (decl, reloc);
2272 }
2273
2274 #ifdef COMMON_ASM_OP
2275 /* This says how to output assembler code to declare an
2276 uninitialized external linkage data object.
2277
2278 For medium model x86-64 we need to use .largecomm opcode for
2279 large objects. */
2280 void
2281 x86_elf_aligned_common (FILE *file,
2282 const char *name, unsigned HOST_WIDE_INT size,
2283 int align)
2284 {
2285 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2286 && size > (unsigned int)ix86_section_threshold)
2287 fprintf (file, ".largecomm\t");
2288 else
2289 fprintf (file, "%s", COMMON_ASM_OP);
2290 assemble_name (file, name);
2291 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2292 size, align / BITS_PER_UNIT);
2293 }
2294
2295 /* Utility function for targets to use in implementing
2296 ASM_OUTPUT_ALIGNED_BSS. */
2297
2298 void
2299 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2300 const char *name, unsigned HOST_WIDE_INT size,
2301 int align)
2302 {
2303 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2304 && size > (unsigned int)ix86_section_threshold)
2305 switch_to_section (get_named_section (decl, ".lbss", 0));
2306 else
2307 switch_to_section (bss_section);
2308 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2309 #ifdef ASM_DECLARE_OBJECT_NAME
2310 last_assemble_variable_decl = decl;
2311 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2312 #else
2313 /* Standard thing is just output label for the object. */
2314 ASM_OUTPUT_LABEL (file, name);
2315 #endif /* ASM_DECLARE_OBJECT_NAME */
2316 ASM_OUTPUT_SKIP (file, size ? size : 1);
2317 }
2318 #endif
2319 \f
2320 void
2321 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2322 {
2323 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2324 make the problem with not enough registers even worse. */
2325 #ifdef INSN_SCHEDULING
2326 if (level > 1)
2327 flag_schedule_insns = 0;
2328 #endif
2329
2330 if (TARGET_MACHO)
2331 /* The Darwin libraries never set errno, so we might as well
2332 avoid calling them when that's the only reason we would. */
2333 flag_errno_math = 0;
2334
2335 /* The default values of these switches depend on the TARGET_64BIT
2336 that is not known at this moment. Mark these values with 2 and
2337 let user the to override these. In case there is no command line option
2338 specifying them, we will set the defaults in override_options. */
2339 if (optimize >= 1)
2340 flag_omit_frame_pointer = 2;
2341 flag_pcc_struct_return = 2;
2342 flag_asynchronous_unwind_tables = 2;
2343 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2344 SUBTARGET_OPTIMIZATION_OPTIONS;
2345 #endif
2346 }
2347 \f
2348 /* Table of valid machine attributes. */
2349 const struct attribute_spec ix86_attribute_table[] =
2350 {
2351 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2352 /* Stdcall attribute says callee is responsible for popping arguments
2353 if they are not variable. */
2354 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2355 /* Fastcall attribute says callee is responsible for popping arguments
2356 if they are not variable. */
2357 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2358 /* Cdecl attribute says the callee is a normal C declaration */
2359 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2360 /* Regparm attribute specifies how many integer arguments are to be
2361 passed in registers. */
2362 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2363 /* X87regparm attribute says we are passing floating point arguments
2364 in 80387 registers. */
2365 { "x87regparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2366 /* Sseregparm attribute says we are using x86_64 calling conventions
2367 for FP arguments. */
2368 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2369 /* force_align_arg_pointer says this function realigns the stack at entry. */
2370 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2371 false, true, true, ix86_handle_cconv_attribute },
2372 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2373 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2374 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2375 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2376 #endif
2377 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2378 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2379 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2380 SUBTARGET_ATTRIBUTE_TABLE,
2381 #endif
2382 { NULL, 0, 0, false, false, false, NULL }
2383 };
2384
2385 /* Decide whether we can make a sibling call to a function. DECL is the
2386 declaration of the function being targeted by the call and EXP is the
2387 CALL_EXPR representing the call. */
2388
2389 static bool
2390 ix86_function_ok_for_sibcall (tree decl, tree exp)
2391 {
2392 tree func;
2393 rtx a, b;
2394
2395 /* If we are generating position-independent code, we cannot sibcall
2396 optimize any indirect call, or a direct call to a global function,
2397 as the PLT requires %ebx be live. */
2398 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2399 return false;
2400
2401 if (decl)
2402 func = decl;
2403 else
2404 {
2405 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2406 if (POINTER_TYPE_P (func))
2407 func = TREE_TYPE (func);
2408 }
2409
2410 /* Check that the return value locations are the same. Like
2411 if we are returning floats on the 80387 register stack, we cannot
2412 make a sibcall from a function that doesn't return a float to a
2413 function that does or, conversely, from a function that does return
2414 a float to a function that doesn't; the necessary stack adjustment
2415 would not be executed. This is also the place we notice
2416 differences in the return value ABI. Note that it is ok for one
2417 of the functions to have void return type as long as the return
2418 value of the other is passed in a register. */
2419 a = ix86_function_value (TREE_TYPE (exp), func, false);
2420 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2421 cfun->decl, false);
2422 if (STACK_REG_P (a) || STACK_REG_P (b))
2423 {
2424 if (!rtx_equal_p (a, b))
2425 return false;
2426 }
2427 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2428 ;
2429 else if (!rtx_equal_p (a, b))
2430 return false;
2431
2432 /* If this call is indirect, we'll need to be able to use a call-clobbered
2433 register for the address of the target function. Make sure that all
2434 such registers are not used for passing parameters. */
2435 if (!decl && !TARGET_64BIT)
2436 {
2437 tree type;
2438
2439 /* We're looking at the CALL_EXPR, we need the type of the function. */
2440 type = TREE_OPERAND (exp, 0); /* pointer expression */
2441 type = TREE_TYPE (type); /* pointer type */
2442 type = TREE_TYPE (type); /* function type */
2443
2444 if (ix86_function_regparm (type, NULL) >= 3)
2445 {
2446 /* ??? Need to count the actual number of registers to be used,
2447 not the possible number of registers. Fix later. */
2448 return false;
2449 }
2450 }
2451
2452 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2453 /* Dllimport'd functions are also called indirectly. */
2454 if (decl && DECL_DLLIMPORT_P (decl)
2455 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2456 return false;
2457 #endif
2458
2459 /* If we forced aligned the stack, then sibcalling would unalign the
2460 stack, which may break the called function. */
2461 if (cfun->machine->force_align_arg_pointer)
2462 return false;
2463
2464 /* Otherwise okay. That also includes certain types of indirect calls. */
2465 return true;
2466 }
2467
2468 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "x87regparm"
2469 and "sseregparm" calling convention attributes;
2470 arguments as in struct attribute_spec.handler. */
2471
2472 static tree
2473 ix86_handle_cconv_attribute (tree *node, tree name,
2474 tree args,
2475 int flags ATTRIBUTE_UNUSED,
2476 bool *no_add_attrs)
2477 {
2478 if (TREE_CODE (*node) != FUNCTION_TYPE
2479 && TREE_CODE (*node) != METHOD_TYPE
2480 && TREE_CODE (*node) != FIELD_DECL
2481 && TREE_CODE (*node) != TYPE_DECL)
2482 {
2483 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2484 IDENTIFIER_POINTER (name));
2485 *no_add_attrs = true;
2486 return NULL_TREE;
2487 }
2488
2489 /* Can combine regparm with all attributes but fastcall. */
2490 if (is_attribute_p ("regparm", name))
2491 {
2492 tree cst;
2493
2494 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2495 {
2496 error ("fastcall and regparm attributes are not compatible");
2497 }
2498
2499 cst = TREE_VALUE (args);
2500 if (TREE_CODE (cst) != INTEGER_CST)
2501 {
2502 warning (OPT_Wattributes,
2503 "%qs attribute requires an integer constant argument",
2504 IDENTIFIER_POINTER (name));
2505 *no_add_attrs = true;
2506 }
2507 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2508 {
2509 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2510 IDENTIFIER_POINTER (name), REGPARM_MAX);
2511 *no_add_attrs = true;
2512 }
2513
2514 if (!TARGET_64BIT
2515 && lookup_attribute (ix86_force_align_arg_pointer_string,
2516 TYPE_ATTRIBUTES (*node))
2517 && compare_tree_int (cst, REGPARM_MAX-1))
2518 {
2519 error ("%s functions limited to %d register parameters",
2520 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2521 }
2522
2523 return NULL_TREE;
2524 }
2525
2526 if (TARGET_64BIT)
2527 {
2528 warning (OPT_Wattributes, "%qs attribute ignored",
2529 IDENTIFIER_POINTER (name));
2530 *no_add_attrs = true;
2531 return NULL_TREE;
2532 }
2533
2534 /* Can combine fastcall with stdcall (redundant), x87regparm
2535 and sseregparm. */
2536 if (is_attribute_p ("fastcall", name))
2537 {
2538 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2539 {
2540 error ("fastcall and cdecl attributes are not compatible");
2541 }
2542 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2543 {
2544 error ("fastcall and stdcall attributes are not compatible");
2545 }
2546 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2547 {
2548 error ("fastcall and regparm attributes are not compatible");
2549 }
2550 }
2551
2552 /* Can combine stdcall with fastcall (redundant), regparm,
2553 x87regparm and sseregparm. */
2554 else if (is_attribute_p ("stdcall", name))
2555 {
2556 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2557 {
2558 error ("stdcall and cdecl attributes are not compatible");
2559 }
2560 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2561 {
2562 error ("stdcall and fastcall attributes are not compatible");
2563 }
2564 }
2565
2566 /* Can combine cdecl with regparm, x87regparm and sseregparm. */
2567 else if (is_attribute_p ("cdecl", name))
2568 {
2569 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2570 {
2571 error ("stdcall and cdecl attributes are not compatible");
2572 }
2573 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2574 {
2575 error ("fastcall and cdecl attributes are not compatible");
2576 }
2577 }
2578
2579 /* Can combine x87regparm or sseregparm with all attributes. */
2580
2581 return NULL_TREE;
2582 }
2583
2584 /* Return 0 if the attributes for two types are incompatible, 1 if they
2585 are compatible, and 2 if they are nearly compatible (which causes a
2586 warning to be generated). */
2587
2588 static int
2589 ix86_comp_type_attributes (tree type1, tree type2)
2590 {
2591 /* Check for mismatch of non-default calling convention. */
2592 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2593
2594 if (TREE_CODE (type1) != FUNCTION_TYPE)
2595 return 1;
2596
2597 /* Check for mismatched fastcall/regparm types. */
2598 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2599 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2600 || (ix86_function_regparm (type1, NULL)
2601 != ix86_function_regparm (type2, NULL)))
2602 return 0;
2603
2604 /* Check for mismatched x87regparm types. */
2605 if (!lookup_attribute ("x87regparm", TYPE_ATTRIBUTES (type1))
2606 != !lookup_attribute ("x87regparm", TYPE_ATTRIBUTES (type2)))
2607 return 0;
2608
2609 /* Check for mismatched sseregparm types. */
2610 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2611 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2612 return 0;
2613
2614 /* Check for mismatched return types (cdecl vs stdcall). */
2615 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2616 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2617 return 0;
2618
2619 return 1;
2620 }
2621 \f
2622 /* Return the regparm value for a function with the indicated TYPE and DECL.
2623 DECL may be NULL when calling function indirectly
2624 or considering a libcall. */
2625
2626 static int
2627 ix86_function_regparm (tree type, tree decl)
2628 {
2629 tree attr;
2630 int regparm = ix86_regparm;
2631 bool user_convention = false;
2632
2633 if (!TARGET_64BIT)
2634 {
2635 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2636 if (attr)
2637 {
2638 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2639 user_convention = true;
2640 }
2641
2642 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2643 {
2644 regparm = 2;
2645 user_convention = true;
2646 }
2647
2648 /* Use register calling convention for local functions when possible. */
2649 if (!TARGET_64BIT && !user_convention && decl
2650 && flag_unit_at_a_time && !profile_flag)
2651 {
2652 struct cgraph_local_info *i = cgraph_local_info (decl);
2653 if (i && i->local)
2654 {
2655 int local_regparm, globals = 0, regno;
2656
2657 /* Make sure no regparm register is taken by a global register
2658 variable. */
2659 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2660 if (global_regs[local_regparm])
2661 break;
2662 /* We can't use regparm(3) for nested functions as these use
2663 static chain pointer in third argument. */
2664 if (local_regparm == 3
2665 && decl_function_context (decl)
2666 && !DECL_NO_STATIC_CHAIN (decl))
2667 local_regparm = 2;
2668 /* If the function realigns its stackpointer, the
2669 prologue will clobber %ecx. If we've already
2670 generated code for the callee, the callee
2671 DECL_STRUCT_FUNCTION is gone, so we fall back to
2672 scanning the attributes for the self-realigning
2673 property. */
2674 if ((DECL_STRUCT_FUNCTION (decl)
2675 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2676 || (!DECL_STRUCT_FUNCTION (decl)
2677 && lookup_attribute (ix86_force_align_arg_pointer_string,
2678 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2679 local_regparm = 2;
2680 /* Each global register variable increases register preassure,
2681 so the more global reg vars there are, the smaller regparm
2682 optimization use, unless requested by the user explicitly. */
2683 for (regno = 0; regno < 6; regno++)
2684 if (global_regs[regno])
2685 globals++;
2686 local_regparm
2687 = globals < local_regparm ? local_regparm - globals : 0;
2688
2689 if (local_regparm > regparm)
2690 regparm = local_regparm;
2691 }
2692 }
2693 }
2694 return regparm;
2695 }
2696
2697 /* Return 1 if we can pass up to X87_REGPARM_MAX floating point
2698 arguments in x87 registers for a function with the indicated
2699 TYPE and DECL. DECL may be NULL when calling function indirectly
2700 or considering a libcall. For local functions, return 2.
2701 Otherwise return 0. */
2702
2703 static int
2704 ix86_function_x87regparm (tree type, tree decl)
2705 {
2706 /* Use x87 registers to pass floating point arguments if requested
2707 by the x87regparm attribute. */
2708 if (TARGET_X87REGPARM
2709 || (type
2710 && lookup_attribute ("x87regparm", TYPE_ATTRIBUTES (type))))
2711 {
2712 if (!TARGET_80387)
2713 {
2714 if (decl)
2715 error ("Calling %qD with attribute x87regparm without "
2716 "80387 enabled", decl);
2717 else
2718 error ("Calling %qT with attribute x87regparm without "
2719 "80387 enabled", type);
2720 return 0;
2721 }
2722
2723 return 1;
2724 }
2725
2726 /* For local functions, pass up to X87_REGPARM_MAX floating point
2727 arguments in x87 registers. */
2728 if (!TARGET_64BIT && decl
2729 && flag_unit_at_a_time && !profile_flag)
2730 {
2731 struct cgraph_local_info *i = cgraph_local_info (decl);
2732 if (i && i->local)
2733 return 2;
2734 }
2735
2736 return 0;
2737 }
2738
2739 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2740 DFmode (2) arguments in SSE registers for a function with the
2741 indicated TYPE and DECL. DECL may be NULL when calling function
2742 indirectly or considering a libcall. Otherwise return 0. */
2743
2744 static int
2745 ix86_function_sseregparm (tree type, tree decl)
2746 {
2747 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2748 by the sseregparm attribute. */
2749 if (TARGET_SSEREGPARM
2750 || (type
2751 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2752 {
2753 if (!TARGET_SSE)
2754 {
2755 if (decl)
2756 error ("Calling %qD with attribute sseregparm without "
2757 "SSE/SSE2 enabled", decl);
2758 else
2759 error ("Calling %qT with attribute sseregparm without "
2760 "SSE/SSE2 enabled", type);
2761 return 0;
2762 }
2763
2764 return 2;
2765 }
2766
2767 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2768 (and DFmode for SSE2) arguments in SSE registers,
2769 even for 32-bit targets. */
2770 if (!TARGET_64BIT && decl
2771 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2772 {
2773 struct cgraph_local_info *i = cgraph_local_info (decl);
2774 if (i && i->local)
2775 return TARGET_SSE2 ? 2 : 1;
2776 }
2777
2778 return 0;
2779 }
2780
2781 /* Return true if EAX is live at the start of the function. Used by
2782 ix86_expand_prologue to determine if we need special help before
2783 calling allocate_stack_worker. */
2784
2785 static bool
2786 ix86_eax_live_at_start_p (void)
2787 {
2788 /* Cheat. Don't bother working forward from ix86_function_regparm
2789 to the function type to whether an actual argument is located in
2790 eax. Instead just look at cfg info, which is still close enough
2791 to correct at this point. This gives false positives for broken
2792 functions that might use uninitialized data that happens to be
2793 allocated in eax, but who cares? */
2794 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2795 }
2796
2797 /* Value is the number of bytes of arguments automatically
2798 popped when returning from a subroutine call.
2799 FUNDECL is the declaration node of the function (as a tree),
2800 FUNTYPE is the data type of the function (as a tree),
2801 or for a library call it is an identifier node for the subroutine name.
2802 SIZE is the number of bytes of arguments passed on the stack.
2803
2804 On the 80386, the RTD insn may be used to pop them if the number
2805 of args is fixed, but if the number is variable then the caller
2806 must pop them all. RTD can't be used for library calls now
2807 because the library is compiled with the Unix compiler.
2808 Use of RTD is a selectable option, since it is incompatible with
2809 standard Unix calling sequences. If the option is not selected,
2810 the caller must always pop the args.
2811
2812 The attribute stdcall is equivalent to RTD on a per module basis. */
2813
2814 int
2815 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2816 {
2817 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2818
2819 /* Cdecl functions override -mrtd, and never pop the stack. */
2820 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2821
2822 /* Stdcall and fastcall functions will pop the stack if not
2823 variable args. */
2824 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2825 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2826 rtd = 1;
2827
2828 if (rtd
2829 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2830 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2831 == void_type_node)))
2832 return size;
2833 }
2834
2835 /* Lose any fake structure return argument if it is passed on the stack. */
2836 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2837 && !TARGET_64BIT
2838 && !KEEP_AGGREGATE_RETURN_POINTER)
2839 {
2840 int nregs = ix86_function_regparm (funtype, fundecl);
2841
2842 if (!nregs)
2843 return GET_MODE_SIZE (Pmode);
2844 }
2845
2846 return 0;
2847 }
2848 \f
2849 /* Argument support functions. */
2850
2851 /* Return true when register may be used to pass function parameters. */
2852 bool
2853 ix86_function_arg_regno_p (int regno)
2854 {
2855 int i;
2856 if (!TARGET_64BIT)
2857 return (regno < REGPARM_MAX
2858 || (TARGET_80387 && FP_REGNO_P (regno)
2859 && (regno < FIRST_FLOAT_REG + X87_REGPARM_MAX))
2860 || (TARGET_MMX && MMX_REGNO_P (regno)
2861 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2862 || (TARGET_SSE && SSE_REGNO_P (regno)
2863 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2864
2865 if (TARGET_SSE && SSE_REGNO_P (regno)
2866 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2867 return true;
2868 /* RAX is used as hidden argument to va_arg functions. */
2869 if (!regno)
2870 return true;
2871 for (i = 0; i < REGPARM_MAX; i++)
2872 if (regno == x86_64_int_parameter_registers[i])
2873 return true;
2874 return false;
2875 }
2876
2877 /* Return if we do not know how to pass TYPE solely in registers. */
2878
2879 static bool
2880 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2881 {
2882 if (must_pass_in_stack_var_size_or_pad (mode, type))
2883 return true;
2884
2885 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2886 The layout_type routine is crafty and tries to trick us into passing
2887 currently unsupported vector types on the stack by using TImode. */
2888 return (!TARGET_64BIT && mode == TImode
2889 && type && TREE_CODE (type) != VECTOR_TYPE);
2890 }
2891
2892 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2893 for a call to a function whose data type is FNTYPE.
2894 For a library call, FNTYPE is 0. */
2895
2896 void
2897 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2898 tree fntype, /* tree ptr for function decl */
2899 rtx libname, /* SYMBOL_REF of library name or 0 */
2900 tree fndecl)
2901 {
2902 static CUMULATIVE_ARGS zero_cum;
2903 tree param, next_param;
2904
2905 if (TARGET_DEBUG_ARG)
2906 {
2907 fprintf (stderr, "\ninit_cumulative_args (");
2908 if (fntype)
2909 fprintf (stderr, "fntype code = %s, ret code = %s",
2910 tree_code_name[(int) TREE_CODE (fntype)],
2911 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2912 else
2913 fprintf (stderr, "no fntype");
2914
2915 if (libname)
2916 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2917 }
2918
2919 *cum = zero_cum;
2920
2921 /* Set up the number of registers to use for passing arguments. */
2922 cum->nregs = ix86_regparm;
2923 if (TARGET_80387)
2924 cum->x87_nregs = X87_REGPARM_MAX;
2925 if (TARGET_SSE)
2926 cum->sse_nregs = SSE_REGPARM_MAX;
2927 if (TARGET_MMX)
2928 cum->mmx_nregs = MMX_REGPARM_MAX;
2929 cum->warn_sse = true;
2930 cum->warn_mmx = true;
2931 cum->maybe_vaarg = false;
2932
2933 /* Use ecx and edx registers if function has fastcall attribute,
2934 else look for regparm information. */
2935 if (fntype && !TARGET_64BIT)
2936 {
2937 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2938 {
2939 cum->nregs = 2;
2940 cum->fastcall = 1;
2941 }
2942 else
2943 cum->nregs = ix86_function_regparm (fntype, fndecl);
2944 }
2945
2946 /* Set up the number of 80387 registers used for passing
2947 floating point arguments. Warn for mismatching ABI. */
2948 cum->float_in_x87 = ix86_function_x87regparm (fntype, fndecl);
2949
2950 /* Set up the number of SSE registers used for passing SFmode
2951 and DFmode arguments. Warn for mismatching ABI. */
2952 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2953
2954 /* Determine if this function has variable arguments. This is
2955 indicated by the last argument being 'void_type_mode' if there
2956 are no variable arguments. If there are variable arguments, then
2957 we won't pass anything in registers in 32-bit mode. */
2958
2959 if (cum->nregs || cum->mmx_nregs
2960 || cum->x87_nregs || cum->sse_nregs)
2961 {
2962 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2963 param != 0; param = next_param)
2964 {
2965 next_param = TREE_CHAIN (param);
2966 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2967 {
2968 if (!TARGET_64BIT)
2969 {
2970 cum->nregs = 0;
2971 cum->x87_nregs = 0;
2972 cum->sse_nregs = 0;
2973 cum->mmx_nregs = 0;
2974 cum->warn_sse = 0;
2975 cum->warn_mmx = 0;
2976 cum->fastcall = 0;
2977 cum->float_in_x87 = 0;
2978 cum->float_in_sse = 0;
2979 }
2980 cum->maybe_vaarg = true;
2981 }
2982 }
2983 }
2984 if ((!fntype && !libname)
2985 || (fntype && !TYPE_ARG_TYPES (fntype)))
2986 cum->maybe_vaarg = true;
2987
2988 if (TARGET_DEBUG_ARG)
2989 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2990
2991 return;
2992 }
2993
2994 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2995 But in the case of vector types, it is some vector mode.
2996
2997 When we have only some of our vector isa extensions enabled, then there
2998 are some modes for which vector_mode_supported_p is false. For these
2999 modes, the generic vector support in gcc will choose some non-vector mode
3000 in order to implement the type. By computing the natural mode, we'll
3001 select the proper ABI location for the operand and not depend on whatever
3002 the middle-end decides to do with these vector types. */
3003
3004 static enum machine_mode
3005 type_natural_mode (tree type)
3006 {
3007 enum machine_mode mode = TYPE_MODE (type);
3008
3009 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3010 {
3011 HOST_WIDE_INT size = int_size_in_bytes (type);
3012 if ((size == 8 || size == 16)
3013 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3014 && TYPE_VECTOR_SUBPARTS (type) > 1)
3015 {
3016 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3017
3018 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3019 mode = MIN_MODE_VECTOR_FLOAT;
3020 else
3021 mode = MIN_MODE_VECTOR_INT;
3022
3023 /* Get the mode which has this inner mode and number of units. */
3024 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3025 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3026 && GET_MODE_INNER (mode) == innermode)
3027 return mode;
3028
3029 gcc_unreachable ();
3030 }
3031 }
3032
3033 return mode;
3034 }
3035
3036 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3037 this may not agree with the mode that the type system has chosen for the
3038 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3039 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3040
3041 static rtx
3042 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3043 unsigned int regno)
3044 {
3045 rtx tmp;
3046
3047 if (orig_mode != BLKmode)
3048 tmp = gen_rtx_REG (orig_mode, regno);
3049 else
3050 {
3051 tmp = gen_rtx_REG (mode, regno);
3052 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3053 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3054 }
3055
3056 return tmp;
3057 }
3058
3059 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3060 of this code is to classify each 8bytes of incoming argument by the register
3061 class and assign registers accordingly. */
3062
3063 /* Return the union class of CLASS1 and CLASS2.
3064 See the x86-64 PS ABI for details. */
3065
3066 static enum x86_64_reg_class
3067 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3068 {
3069 /* Rule #1: If both classes are equal, this is the resulting class. */
3070 if (class1 == class2)
3071 return class1;
3072
3073 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3074 the other class. */
3075 if (class1 == X86_64_NO_CLASS)
3076 return class2;
3077 if (class2 == X86_64_NO_CLASS)
3078 return class1;
3079
3080 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3081 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3082 return X86_64_MEMORY_CLASS;
3083
3084 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3085 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3086 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3087 return X86_64_INTEGERSI_CLASS;
3088 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3089 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3090 return X86_64_INTEGER_CLASS;
3091
3092 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3093 MEMORY is used. */
3094 if (class1 == X86_64_X87_CLASS
3095 || class1 == X86_64_X87UP_CLASS
3096 || class1 == X86_64_COMPLEX_X87_CLASS
3097 || class2 == X86_64_X87_CLASS
3098 || class2 == X86_64_X87UP_CLASS
3099 || class2 == X86_64_COMPLEX_X87_CLASS)
3100 return X86_64_MEMORY_CLASS;
3101
3102 /* Rule #6: Otherwise class SSE is used. */
3103 return X86_64_SSE_CLASS;
3104 }
3105
3106 /* Classify the argument of type TYPE and mode MODE.
3107 CLASSES will be filled by the register class used to pass each word
3108 of the operand. The number of words is returned. In case the parameter
3109 should be passed in memory, 0 is returned. As a special case for zero
3110 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3111
3112 BIT_OFFSET is used internally for handling records and specifies offset
3113 of the offset in bits modulo 256 to avoid overflow cases.
3114
3115 See the x86-64 PS ABI for details.
3116 */
3117
3118 static int
3119 classify_argument (enum machine_mode mode, tree type,
3120 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3121 {
3122 HOST_WIDE_INT bytes =
3123 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3124 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3125
3126 /* Variable sized entities are always passed/returned in memory. */
3127 if (bytes < 0)
3128 return 0;
3129
3130 if (mode != VOIDmode
3131 && targetm.calls.must_pass_in_stack (mode, type))
3132 return 0;
3133
3134 if (type && AGGREGATE_TYPE_P (type))
3135 {
3136 int i;
3137 tree field;
3138 enum x86_64_reg_class subclasses[MAX_CLASSES];
3139
3140 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3141 if (bytes > 16)
3142 return 0;
3143
3144 for (i = 0; i < words; i++)
3145 classes[i] = X86_64_NO_CLASS;
3146
3147 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3148 signalize memory class, so handle it as special case. */
3149 if (!words)
3150 {
3151 classes[0] = X86_64_NO_CLASS;
3152 return 1;
3153 }
3154
3155 /* Classify each field of record and merge classes. */
3156 switch (TREE_CODE (type))
3157 {
3158 case RECORD_TYPE:
3159 /* And now merge the fields of structure. */
3160 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3161 {
3162 if (TREE_CODE (field) == FIELD_DECL)
3163 {
3164 int num;
3165
3166 if (TREE_TYPE (field) == error_mark_node)
3167 continue;
3168
3169 /* Bitfields are always classified as integer. Handle them
3170 early, since later code would consider them to be
3171 misaligned integers. */
3172 if (DECL_BIT_FIELD (field))
3173 {
3174 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3175 i < ((int_bit_position (field) + (bit_offset % 64))
3176 + tree_low_cst (DECL_SIZE (field), 0)
3177 + 63) / 8 / 8; i++)
3178 classes[i] =
3179 merge_classes (X86_64_INTEGER_CLASS,
3180 classes[i]);
3181 }
3182 else
3183 {
3184 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3185 TREE_TYPE (field), subclasses,
3186 (int_bit_position (field)
3187 + bit_offset) % 256);
3188 if (!num)
3189 return 0;
3190 for (i = 0; i < num; i++)
3191 {
3192 int pos =
3193 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3194 classes[i + pos] =
3195 merge_classes (subclasses[i], classes[i + pos]);
3196 }
3197 }
3198 }
3199 }
3200 break;
3201
3202 case ARRAY_TYPE:
3203 /* Arrays are handled as small records. */
3204 {
3205 int num;
3206 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3207 TREE_TYPE (type), subclasses, bit_offset);
3208 if (!num)
3209 return 0;
3210
3211 /* The partial classes are now full classes. */
3212 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3213 subclasses[0] = X86_64_SSE_CLASS;
3214 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3215 subclasses[0] = X86_64_INTEGER_CLASS;
3216
3217 for (i = 0; i < words; i++)
3218 classes[i] = subclasses[i % num];
3219
3220 break;
3221 }
3222 case UNION_TYPE:
3223 case QUAL_UNION_TYPE:
3224 /* Unions are similar to RECORD_TYPE but offset is always 0.
3225 */
3226 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3227 {
3228 if (TREE_CODE (field) == FIELD_DECL)
3229 {
3230 int num;
3231
3232 if (TREE_TYPE (field) == error_mark_node)
3233 continue;
3234
3235 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3236 TREE_TYPE (field), subclasses,
3237 bit_offset);
3238 if (!num)
3239 return 0;
3240 for (i = 0; i < num; i++)
3241 classes[i] = merge_classes (subclasses[i], classes[i]);
3242 }
3243 }
3244 break;
3245
3246 default:
3247 gcc_unreachable ();
3248 }
3249
3250 /* Final merger cleanup. */
3251 for (i = 0; i < words; i++)
3252 {
3253 /* If one class is MEMORY, everything should be passed in
3254 memory. */
3255 if (classes[i] == X86_64_MEMORY_CLASS)
3256 return 0;
3257
3258 /* The X86_64_SSEUP_CLASS should be always preceded by
3259 X86_64_SSE_CLASS. */
3260 if (classes[i] == X86_64_SSEUP_CLASS
3261 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3262 classes[i] = X86_64_SSE_CLASS;
3263
3264 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3265 if (classes[i] == X86_64_X87UP_CLASS
3266 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3267 classes[i] = X86_64_SSE_CLASS;
3268 }
3269 return words;
3270 }
3271
3272 /* Compute alignment needed. We align all types to natural boundaries with
3273 exception of XFmode that is aligned to 64bits. */
3274 if (mode != VOIDmode && mode != BLKmode)
3275 {
3276 int mode_alignment = GET_MODE_BITSIZE (mode);
3277
3278 if (mode == XFmode)
3279 mode_alignment = 128;
3280 else if (mode == XCmode)
3281 mode_alignment = 256;
3282 if (COMPLEX_MODE_P (mode))
3283 mode_alignment /= 2;
3284 /* Misaligned fields are always returned in memory. */
3285 if (bit_offset % mode_alignment)
3286 return 0;
3287 }
3288
3289 /* for V1xx modes, just use the base mode */
3290 if (VECTOR_MODE_P (mode)
3291 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3292 mode = GET_MODE_INNER (mode);
3293
3294 /* Classification of atomic types. */
3295 switch (mode)
3296 {
3297 case SDmode:
3298 case DDmode:
3299 classes[0] = X86_64_SSE_CLASS;
3300 return 1;
3301 case TDmode:
3302 classes[0] = X86_64_SSE_CLASS;
3303 classes[1] = X86_64_SSEUP_CLASS;
3304 return 2;
3305 case DImode:
3306 case SImode:
3307 case HImode:
3308 case QImode:
3309 case CSImode:
3310 case CHImode:
3311 case CQImode:
3312 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3313 classes[0] = X86_64_INTEGERSI_CLASS;
3314 else
3315 classes[0] = X86_64_INTEGER_CLASS;
3316 return 1;
3317 case CDImode:
3318 case TImode:
3319 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3320 return 2;
3321 case CTImode:
3322 return 0;
3323 case SFmode:
3324 if (!(bit_offset % 64))
3325 classes[0] = X86_64_SSESF_CLASS;
3326 else
3327 classes[0] = X86_64_SSE_CLASS;
3328 return 1;
3329 case DFmode:
3330 classes[0] = X86_64_SSEDF_CLASS;
3331 return 1;
3332 case XFmode:
3333 classes[0] = X86_64_X87_CLASS;
3334 classes[1] = X86_64_X87UP_CLASS;
3335 return 2;
3336 case TFmode:
3337 classes[0] = X86_64_SSE_CLASS;
3338 classes[1] = X86_64_SSEUP_CLASS;
3339 return 2;
3340 case SCmode:
3341 classes[0] = X86_64_SSE_CLASS;
3342 return 1;
3343 case DCmode:
3344 classes[0] = X86_64_SSEDF_CLASS;
3345 classes[1] = X86_64_SSEDF_CLASS;
3346 return 2;
3347 case XCmode:
3348 classes[0] = X86_64_COMPLEX_X87_CLASS;
3349 return 1;
3350 case TCmode:
3351 /* This modes is larger than 16 bytes. */
3352 return 0;
3353 case V4SFmode:
3354 case V4SImode:
3355 case V16QImode:
3356 case V8HImode:
3357 case V2DFmode:
3358 case V2DImode:
3359 classes[0] = X86_64_SSE_CLASS;
3360 classes[1] = X86_64_SSEUP_CLASS;
3361 return 2;
3362 case V2SFmode:
3363 case V2SImode:
3364 case V4HImode:
3365 case V8QImode:
3366 classes[0] = X86_64_SSE_CLASS;
3367 return 1;
3368 case BLKmode:
3369 case VOIDmode:
3370 return 0;
3371 default:
3372 gcc_assert (VECTOR_MODE_P (mode));
3373
3374 if (bytes > 16)
3375 return 0;
3376
3377 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3378
3379 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3380 classes[0] = X86_64_INTEGERSI_CLASS;
3381 else
3382 classes[0] = X86_64_INTEGER_CLASS;
3383 classes[1] = X86_64_INTEGER_CLASS;
3384 return 1 + (bytes > 8);
3385 }
3386 }
3387
3388 /* Examine the argument and return set number of register required in each
3389 class. Return 0 iff parameter should be passed in memory. */
3390 static int
3391 examine_argument (enum machine_mode mode, tree type, int in_return,
3392 int *int_nregs, int *sse_nregs)
3393 {
3394 enum x86_64_reg_class class[MAX_CLASSES];
3395 int n = classify_argument (mode, type, class, 0);
3396
3397 *int_nregs = 0;
3398 *sse_nregs = 0;
3399 if (!n)
3400 return 0;
3401 for (n--; n >= 0; n--)
3402 switch (class[n])
3403 {
3404 case X86_64_INTEGER_CLASS:
3405 case X86_64_INTEGERSI_CLASS:
3406 (*int_nregs)++;
3407 break;
3408 case X86_64_SSE_CLASS:
3409 case X86_64_SSESF_CLASS:
3410 case X86_64_SSEDF_CLASS:
3411 (*sse_nregs)++;
3412 break;
3413 case X86_64_NO_CLASS:
3414 case X86_64_SSEUP_CLASS:
3415 break;
3416 case X86_64_X87_CLASS:
3417 case X86_64_X87UP_CLASS:
3418 if (!in_return)
3419 return 0;
3420 break;
3421 case X86_64_COMPLEX_X87_CLASS:
3422 return in_return ? 2 : 0;
3423 case X86_64_MEMORY_CLASS:
3424 gcc_unreachable ();
3425 }
3426 return 1;
3427 }
3428
3429 /* Construct container for the argument used by GCC interface. See
3430 FUNCTION_ARG for the detailed description. */
3431
3432 static rtx
3433 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3434 tree type, int in_return, int nintregs, int nsseregs,
3435 const int *intreg, int sse_regno)
3436 {
3437 /* The following variables hold the static issued_error state. */
3438 static bool issued_sse_arg_error;
3439 static bool issued_sse_ret_error;
3440 static bool issued_x87_ret_error;
3441
3442 enum machine_mode tmpmode;
3443 int bytes =
3444 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3445 enum x86_64_reg_class class[MAX_CLASSES];
3446 int n;
3447 int i;
3448 int nexps = 0;
3449 int needed_sseregs, needed_intregs;
3450 rtx exp[MAX_CLASSES];
3451 rtx ret;
3452
3453 n = classify_argument (mode, type, class, 0);
3454 if (TARGET_DEBUG_ARG)
3455 {
3456 if (!n)
3457 fprintf (stderr, "Memory class\n");
3458 else
3459 {
3460 fprintf (stderr, "Classes:");
3461 for (i = 0; i < n; i++)
3462 {
3463 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3464 }
3465 fprintf (stderr, "\n");
3466 }
3467 }
3468 if (!n)
3469 return NULL;
3470 if (!examine_argument (mode, type, in_return, &needed_intregs,
3471 &needed_sseregs))
3472 return NULL;
3473 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3474 return NULL;
3475
3476 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3477 some less clueful developer tries to use floating-point anyway. */
3478 if (needed_sseregs && !TARGET_SSE)
3479 {
3480 if (in_return)
3481 {
3482 if (!issued_sse_ret_error)
3483 {
3484 error ("SSE register return with SSE disabled");
3485 issued_sse_ret_error = true;
3486 }
3487 }
3488 else if (!issued_sse_arg_error)
3489 {
3490 error ("SSE register argument with SSE disabled");
3491 issued_sse_arg_error = true;
3492 }
3493 return NULL;
3494 }
3495
3496 /* Likewise, error if the ABI requires us to return values in the
3497 x87 registers and the user specified -mno-80387. */
3498 if (!TARGET_80387 && in_return)
3499 for (i = 0; i < n; i++)
3500 if (class[i] == X86_64_X87_CLASS
3501 || class[i] == X86_64_X87UP_CLASS
3502 || class[i] == X86_64_COMPLEX_X87_CLASS)
3503 {
3504 if (!issued_x87_ret_error)
3505 {
3506 error ("x87 register return with x87 disabled");
3507 issued_x87_ret_error = true;
3508 }
3509 return NULL;
3510 }
3511
3512 /* First construct simple cases. Avoid SCmode, since we want to use
3513 single register to pass this type. */
3514 if (n == 1 && mode != SCmode)
3515 switch (class[0])
3516 {
3517 case X86_64_INTEGER_CLASS:
3518 case X86_64_INTEGERSI_CLASS:
3519 return gen_rtx_REG (mode, intreg[0]);
3520 case X86_64_SSE_CLASS:
3521 case X86_64_SSESF_CLASS:
3522 case X86_64_SSEDF_CLASS:
3523 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3524 case X86_64_X87_CLASS:
3525 case X86_64_COMPLEX_X87_CLASS:
3526 return gen_rtx_REG (mode, FIRST_STACK_REG);
3527 case X86_64_NO_CLASS:
3528 /* Zero sized array, struct or class. */
3529 return NULL;
3530 default:
3531 gcc_unreachable ();
3532 }
3533 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3534 && mode != BLKmode)
3535 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3536 if (n == 2
3537 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3538 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3539 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3540 && class[1] == X86_64_INTEGER_CLASS
3541 && (mode == CDImode || mode == TImode || mode == TFmode)
3542 && intreg[0] + 1 == intreg[1])
3543 return gen_rtx_REG (mode, intreg[0]);
3544
3545 /* Otherwise figure out the entries of the PARALLEL. */
3546 for (i = 0; i < n; i++)
3547 {
3548 switch (class[i])
3549 {
3550 case X86_64_NO_CLASS:
3551 break;
3552 case X86_64_INTEGER_CLASS:
3553 case X86_64_INTEGERSI_CLASS:
3554 /* Merge TImodes on aligned occasions here too. */
3555 if (i * 8 + 8 > bytes)
3556 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3557 else if (class[i] == X86_64_INTEGERSI_CLASS)
3558 tmpmode = SImode;
3559 else
3560 tmpmode = DImode;
3561 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3562 if (tmpmode == BLKmode)
3563 tmpmode = DImode;
3564 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3565 gen_rtx_REG (tmpmode, *intreg),
3566 GEN_INT (i*8));
3567 intreg++;
3568 break;
3569 case X86_64_SSESF_CLASS:
3570 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3571 gen_rtx_REG (SFmode,
3572 SSE_REGNO (sse_regno)),
3573 GEN_INT (i*8));
3574 sse_regno++;
3575 break;
3576 case X86_64_SSEDF_CLASS:
3577 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3578 gen_rtx_REG (DFmode,
3579 SSE_REGNO (sse_regno)),
3580 GEN_INT (i*8));
3581 sse_regno++;
3582 break;
3583 case X86_64_SSE_CLASS:
3584 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3585 tmpmode = TImode;
3586 else
3587 tmpmode = DImode;
3588 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3589 gen_rtx_REG (tmpmode,
3590 SSE_REGNO (sse_regno)),
3591 GEN_INT (i*8));
3592 if (tmpmode == TImode)
3593 i++;
3594 sse_regno++;
3595 break;
3596 default:
3597 gcc_unreachable ();
3598 }
3599 }
3600
3601 /* Empty aligned struct, union or class. */
3602 if (nexps == 0)
3603 return NULL;
3604
3605 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3606 for (i = 0; i < nexps; i++)
3607 XVECEXP (ret, 0, i) = exp [i];
3608 return ret;
3609 }
3610
3611 /* Update the data in CUM to advance over an argument
3612 of mode MODE and data type TYPE.
3613 (TYPE is null for libcalls where that information may not be available.) */
3614
3615 void
3616 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3617 tree type, int named)
3618 {
3619 int bytes =
3620 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3621 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3622
3623 if (type)
3624 mode = type_natural_mode (type);
3625
3626 if (TARGET_DEBUG_ARG)
3627 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3628 "mode=%s, named=%d)\n\n",
3629 words, cum->words, cum->nregs, cum->sse_nregs,
3630 GET_MODE_NAME (mode), named);
3631
3632 if (TARGET_64BIT)
3633 {
3634 int int_nregs, sse_nregs;
3635 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3636 cum->words += words;
3637 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3638 {
3639 cum->nregs -= int_nregs;
3640 cum->sse_nregs -= sse_nregs;
3641 cum->regno += int_nregs;
3642 cum->sse_regno += sse_nregs;
3643 }
3644 else
3645 cum->words += words;
3646 }
3647 else
3648 {
3649 switch (mode)
3650 {
3651 default:
3652 break;
3653
3654 case BLKmode:
3655 if (bytes < 0)
3656 break;
3657 /* FALLTHRU */
3658
3659 case DImode:
3660 case SImode:
3661 case HImode:
3662 case QImode:
3663 cum->words += words;
3664 cum->nregs -= words;
3665 cum->regno += words;
3666
3667 if (cum->nregs <= 0)
3668 {
3669 cum->nregs = 0;
3670 cum->regno = 0;
3671 }
3672 break;
3673
3674 case SFmode:
3675 if (cum->float_in_sse > 0)
3676 goto skip_80387;
3677
3678 case DFmode:
3679 if (cum->float_in_sse > 1)
3680 goto skip_80387;
3681
3682 /* Because no inherent XFmode->DFmode and XFmode->SFmode
3683 rounding takes place when values are passed in x87
3684 registers, pass DFmode and SFmode types to local functions
3685 only when flag_unsafe_math_optimizations is set. */
3686 if (!cum->float_in_x87
3687 || (cum->float_in_x87 == 2
3688 && !flag_unsafe_math_optimizations))
3689 break;
3690
3691 case XFmode:
3692 if (!cum->float_in_x87)
3693 break;
3694
3695 if (!type || !AGGREGATE_TYPE_P (type))
3696 {
3697 cum->x87_nregs -= 1;
3698 cum->x87_regno += 1;
3699 if (cum->x87_nregs <= 0)
3700 {
3701 cum->x87_nregs = 0;
3702 cum->x87_regno = 0;
3703 }
3704 }
3705 break;
3706
3707 skip_80387:
3708
3709 case TImode:
3710 case V16QImode:
3711 case V8HImode:
3712 case V4SImode:
3713 case V2DImode:
3714 case V4SFmode:
3715 case V2DFmode:
3716 if (!type || !AGGREGATE_TYPE_P (type))
3717 {
3718 cum->sse_nregs -= 1;
3719 cum->sse_regno += 1;
3720 if (cum->sse_nregs <= 0)
3721 {
3722 cum->sse_nregs = 0;
3723 cum->sse_regno = 0;
3724 }
3725 }
3726 break;
3727
3728 case V8QImode:
3729 case V4HImode:
3730 case V2SImode:
3731 case V2SFmode:
3732 if (!type || !AGGREGATE_TYPE_P (type))
3733 {
3734 cum->mmx_nregs -= 1;
3735 cum->mmx_regno += 1;
3736 if (cum->mmx_nregs <= 0)
3737 {
3738 cum->mmx_nregs = 0;
3739 cum->mmx_regno = 0;
3740 }
3741 }
3742 break;
3743 }
3744 }
3745 }
3746
3747 /* Define where to put the arguments to a function.
3748 Value is zero to push the argument on the stack,
3749 or a hard register in which to store the argument.
3750
3751 MODE is the argument's machine mode.
3752 TYPE is the data type of the argument (as a tree).
3753 This is null for libcalls where that information may
3754 not be available.
3755 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3756 the preceding args and about the function being called.
3757 NAMED is nonzero if this argument is a named parameter
3758 (otherwise it is an extra parameter matching an ellipsis). */
3759
3760 rtx
3761 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3762 tree type, int named)
3763 {
3764 enum machine_mode mode = orig_mode;
3765 rtx ret = NULL_RTX;
3766 int bytes =
3767 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3768 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3769 static bool warnedsse, warnedmmx;
3770
3771 /* To simplify the code below, represent vector types with a vector mode
3772 even if MMX/SSE are not active. */
3773 if (type && TREE_CODE (type) == VECTOR_TYPE)
3774 mode = type_natural_mode (type);
3775
3776 /* Handle a hidden AL argument containing number of registers for varargs
3777 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3778 any AL settings. */
3779 if (mode == VOIDmode)
3780 {
3781 if (TARGET_64BIT)
3782 return GEN_INT (cum->maybe_vaarg
3783 ? (cum->sse_nregs < 0
3784 ? SSE_REGPARM_MAX
3785 : cum->sse_regno)
3786 : -1);
3787 else
3788 return constm1_rtx;
3789 }
3790 if (TARGET_64BIT)
3791 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3792 cum->sse_nregs,
3793 &x86_64_int_parameter_registers [cum->regno],
3794 cum->sse_regno);
3795 else
3796 switch (mode)
3797 {
3798 default:
3799 break;
3800
3801 case BLKmode:
3802 if (bytes < 0)
3803 break;
3804 /* FALLTHRU */
3805 case DImode:
3806 case SImode:
3807 case HImode:
3808 case QImode:
3809 if (words <= cum->nregs)
3810 {
3811 int regno = cum->regno;
3812
3813 /* Fastcall allocates the first two DWORD (SImode) or
3814 smaller arguments to ECX and EDX. */
3815 if (cum->fastcall)
3816 {
3817 if (mode == BLKmode || mode == DImode)
3818 break;
3819
3820 /* ECX not EAX is the first allocated register. */
3821 if (regno == 0)
3822 regno = 2;
3823 }
3824 ret = gen_rtx_REG (mode, regno);
3825 }
3826 break;
3827
3828 case SFmode:
3829 if (cum->float_in_sse > 0)
3830 goto skip_80387;
3831
3832 case DFmode:
3833 if (cum->float_in_sse > 1)
3834 goto skip_80387;
3835
3836 /* Because no inherent XFmode->DFmode and XFmode->SFmode
3837 rounding takes place when values are passed in x87
3838 registers, pass DFmode and SFmode types to local functions
3839 only when flag_unsafe_math_optimizations is set. */
3840 if (!cum->float_in_x87
3841 || (cum->float_in_x87 == 2
3842 && !flag_unsafe_math_optimizations))
3843 break;
3844
3845 case XFmode:
3846 if (!cum->float_in_x87)
3847 break;
3848
3849 if (!type || !AGGREGATE_TYPE_P (type))
3850 if (cum->x87_nregs)
3851 ret = gen_rtx_REG (mode, cum->x87_regno + FIRST_FLOAT_REG);
3852 break;
3853
3854 skip_80387:
3855
3856 case TImode:
3857 case V16QImode:
3858 case V8HImode:
3859 case V4SImode:
3860 case V2DImode:
3861 case V4SFmode:
3862 case V2DFmode:
3863 if (!type || !AGGREGATE_TYPE_P (type))
3864 {
3865 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3866 {
3867 warnedsse = true;
3868 warning (0, "SSE vector argument without SSE enabled "
3869 "changes the ABI");
3870 }
3871 if (cum->sse_nregs)
3872 ret = gen_reg_or_parallel (mode, orig_mode,
3873 cum->sse_regno + FIRST_SSE_REG);
3874 }
3875 break;
3876 case V8QImode:
3877 case V4HImode:
3878 case V2SImode:
3879 case V2SFmode:
3880 if (!type || !AGGREGATE_TYPE_P (type))
3881 {
3882 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3883 {
3884 warnedmmx = true;
3885 warning (0, "MMX vector argument without MMX enabled "
3886 "changes the ABI");
3887 }
3888 if (cum->mmx_nregs)
3889 ret = gen_reg_or_parallel (mode, orig_mode,
3890 cum->mmx_regno + FIRST_MMX_REG);
3891 }
3892 break;
3893 }
3894
3895 if (TARGET_DEBUG_ARG)
3896 {
3897 fprintf (stderr,
3898 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3899 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3900
3901 if (ret)
3902 print_simple_rtl (stderr, ret);
3903 else
3904 fprintf (stderr, ", stack");
3905
3906 fprintf (stderr, " )\n");
3907 }
3908
3909 return ret;
3910 }
3911
3912 /* A C expression that indicates when an argument must be passed by
3913 reference. If nonzero for an argument, a copy of that argument is
3914 made in memory and a pointer to the argument is passed instead of
3915 the argument itself. The pointer is passed in whatever way is
3916 appropriate for passing a pointer to that type. */
3917
3918 static bool
3919 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3920 enum machine_mode mode ATTRIBUTE_UNUSED,
3921 tree type, bool named ATTRIBUTE_UNUSED)
3922 {
3923 if (!TARGET_64BIT)
3924 return 0;
3925
3926 if (type && int_size_in_bytes (type) == -1)
3927 {
3928 if (TARGET_DEBUG_ARG)
3929 fprintf (stderr, "function_arg_pass_by_reference\n");
3930 return 1;
3931 }
3932
3933 return 0;
3934 }
3935
3936 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3937 ABI. Only called if TARGET_SSE. */
3938 static bool
3939 contains_128bit_aligned_vector_p (tree type)
3940 {
3941 enum machine_mode mode = TYPE_MODE (type);
3942 if (SSE_REG_MODE_P (mode)
3943 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3944 return true;
3945 if (TYPE_ALIGN (type) < 128)
3946 return false;
3947
3948 if (AGGREGATE_TYPE_P (type))
3949 {
3950 /* Walk the aggregates recursively. */
3951 switch (TREE_CODE (type))
3952 {
3953 case RECORD_TYPE:
3954 case UNION_TYPE:
3955 case QUAL_UNION_TYPE:
3956 {
3957 tree field;
3958
3959 /* Walk all the structure fields. */
3960 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3961 {
3962 if (TREE_CODE (field) == FIELD_DECL
3963 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3964 return true;
3965 }
3966 break;
3967 }
3968
3969 case ARRAY_TYPE:
3970 /* Just for use if some languages passes arrays by value. */
3971 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3972 return true;
3973 break;
3974
3975 default:
3976 gcc_unreachable ();
3977 }
3978 }
3979 return false;
3980 }
3981
3982 /* Gives the alignment boundary, in bits, of an argument with the
3983 specified mode and type. */
3984
3985 int
3986 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3987 {
3988 int align;
3989 if (type)
3990 align = TYPE_ALIGN (type);
3991 else
3992 align = GET_MODE_ALIGNMENT (mode);
3993 if (align < PARM_BOUNDARY)
3994 align = PARM_BOUNDARY;
3995 if (!TARGET_64BIT)
3996 {
3997 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3998 make an exception for SSE modes since these require 128bit
3999 alignment.
4000
4001 The handling here differs from field_alignment. ICC aligns MMX
4002 arguments to 4 byte boundaries, while structure fields are aligned
4003 to 8 byte boundaries. */
4004 if (!TARGET_SSE)
4005 align = PARM_BOUNDARY;
4006 else if (!type)
4007 {
4008 if (!SSE_REG_MODE_P (mode))
4009 align = PARM_BOUNDARY;
4010 }
4011 else
4012 {
4013 if (!contains_128bit_aligned_vector_p (type))
4014 align = PARM_BOUNDARY;
4015 }
4016 }
4017 if (align > 128)
4018 align = 128;
4019 return align;
4020 }
4021
4022 /* Return true if N is a possible register number of function value. */
4023 bool
4024 ix86_function_value_regno_p (int regno)
4025 {
4026 if (regno == 0
4027 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4028 || (regno == FIRST_SSE_REG && TARGET_SSE))
4029 return true;
4030
4031 if (!TARGET_64BIT
4032 && (regno == FIRST_MMX_REG && TARGET_MMX))
4033 return true;
4034
4035 return false;
4036 }
4037
4038 /* Define how to find the value returned by a function.
4039 VALTYPE is the data type of the value (as a tree).
4040 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4041 otherwise, FUNC is 0. */
4042 rtx
4043 ix86_function_value (tree valtype, tree fntype_or_decl,
4044 bool outgoing ATTRIBUTE_UNUSED)
4045 {
4046 enum machine_mode natmode = type_natural_mode (valtype);
4047
4048 if (TARGET_64BIT)
4049 {
4050 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4051 1, REGPARM_MAX, SSE_REGPARM_MAX,
4052 x86_64_int_return_registers, 0);
4053 /* For zero sized structures, construct_container return NULL, but we
4054 need to keep rest of compiler happy by returning meaningful value. */
4055 if (!ret)
4056 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4057 return ret;
4058 }
4059 else
4060 {
4061 tree fn = NULL_TREE, fntype;
4062 if (fntype_or_decl
4063 && DECL_P (fntype_or_decl))
4064 fn = fntype_or_decl;
4065 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4066 return gen_rtx_REG (TYPE_MODE (valtype),
4067 ix86_value_regno (natmode, fn, fntype));
4068 }
4069 }
4070
4071 /* Return true iff type is returned in memory. */
4072 int
4073 ix86_return_in_memory (tree type)
4074 {
4075 int needed_intregs, needed_sseregs, size;
4076 enum machine_mode mode = type_natural_mode (type);
4077
4078 if (TARGET_64BIT)
4079 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4080
4081 if (mode == BLKmode)
4082 return 1;
4083
4084 size = int_size_in_bytes (type);
4085
4086 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4087 return 0;
4088
4089 if (VECTOR_MODE_P (mode) || mode == TImode)
4090 {
4091 /* User-created vectors small enough to fit in EAX. */
4092 if (size < 8)
4093 return 0;
4094
4095 /* MMX/3dNow values are returned in MM0,
4096 except when it doesn't exits. */
4097 if (size == 8)
4098 return (TARGET_MMX ? 0 : 1);
4099
4100 /* SSE values are returned in XMM0, except when it doesn't exist. */
4101 if (size == 16)
4102 return (TARGET_SSE ? 0 : 1);
4103 }
4104
4105 if (mode == XFmode)
4106 return 0;
4107
4108 if (mode == TDmode)
4109 return 1;
4110
4111 if (size > 12)
4112 return 1;
4113 return 0;
4114 }
4115
4116 /* When returning SSE vector types, we have a choice of either
4117 (1) being abi incompatible with a -march switch, or
4118 (2) generating an error.
4119 Given no good solution, I think the safest thing is one warning.
4120 The user won't be able to use -Werror, but....
4121
4122 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4123 called in response to actually generating a caller or callee that
4124 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4125 via aggregate_value_p for general type probing from tree-ssa. */
4126
4127 static rtx
4128 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4129 {
4130 static bool warnedsse, warnedmmx;
4131
4132 if (type)
4133 {
4134 /* Look at the return type of the function, not the function type. */
4135 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4136
4137 if (!TARGET_SSE && !warnedsse)
4138 {
4139 if (mode == TImode
4140 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4141 {
4142 warnedsse = true;
4143 warning (0, "SSE vector return without SSE enabled "
4144 "changes the ABI");
4145 }
4146 }
4147
4148 if (!TARGET_MMX && !warnedmmx)
4149 {
4150 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4151 {
4152 warnedmmx = true;
4153 warning (0, "MMX vector return without MMX enabled "
4154 "changes the ABI");
4155 }
4156 }
4157 }
4158
4159 return NULL;
4160 }
4161
4162 /* Define how to find the value returned by a library function
4163 assuming the value has mode MODE. */
4164 rtx
4165 ix86_libcall_value (enum machine_mode mode)
4166 {
4167 if (TARGET_64BIT)
4168 {
4169 switch (mode)
4170 {
4171 case SFmode:
4172 case SCmode:
4173 case DFmode:
4174 case DCmode:
4175 case TFmode:
4176 case SDmode:
4177 case DDmode:
4178 case TDmode:
4179 return gen_rtx_REG (mode, FIRST_SSE_REG);
4180 case XFmode:
4181 case XCmode:
4182 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4183 case TCmode:
4184 return NULL;
4185 default:
4186 return gen_rtx_REG (mode, 0);
4187 }
4188 }
4189 else
4190 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4191 }
4192
4193 /* Given a mode, return the register to use for a return value. */
4194
4195 static int
4196 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4197 {
4198 gcc_assert (!TARGET_64BIT);
4199
4200 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4201 we normally prevent this case when mmx is not available. However
4202 some ABIs may require the result to be returned like DImode. */
4203 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4204 return TARGET_MMX ? FIRST_MMX_REG : 0;
4205
4206 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4207 we prevent this case when sse is not available. However some ABIs
4208 may require the result to be returned like integer TImode. */
4209 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4210 return TARGET_SSE ? FIRST_SSE_REG : 0;
4211
4212 /* Decimal floating point values can go in %eax, unlike other float modes. */
4213 if (DECIMAL_FLOAT_MODE_P (mode))
4214 return 0;
4215
4216 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4217 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4218 return 0;
4219
4220 /* Floating point return values in %st(0), except for local functions when
4221 SSE math is enabled or for functions with sseregparm attribute. */
4222 if ((func || fntype)
4223 && (mode == SFmode || mode == DFmode))
4224 {
4225 int sse_level = ix86_function_sseregparm (fntype, func);
4226 if ((sse_level >= 1 && mode == SFmode)
4227 || (sse_level == 2 && mode == DFmode))
4228 return FIRST_SSE_REG;
4229 }
4230
4231 return FIRST_FLOAT_REG;
4232 }
4233 \f
4234 /* Create the va_list data type. */
4235
4236 static tree
4237 ix86_build_builtin_va_list (void)
4238 {
4239 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4240
4241 /* For i386 we use plain pointer to argument area. */
4242 if (!TARGET_64BIT)
4243 return build_pointer_type (char_type_node);
4244
4245 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4246 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4247
4248 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4249 unsigned_type_node);
4250 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4251 unsigned_type_node);
4252 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4253 ptr_type_node);
4254 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4255 ptr_type_node);
4256
4257 va_list_gpr_counter_field = f_gpr;
4258 va_list_fpr_counter_field = f_fpr;
4259
4260 DECL_FIELD_CONTEXT (f_gpr) = record;
4261 DECL_FIELD_CONTEXT (f_fpr) = record;
4262 DECL_FIELD_CONTEXT (f_ovf) = record;
4263 DECL_FIELD_CONTEXT (f_sav) = record;
4264
4265 TREE_CHAIN (record) = type_decl;
4266 TYPE_NAME (record) = type_decl;
4267 TYPE_FIELDS (record) = f_gpr;
4268 TREE_CHAIN (f_gpr) = f_fpr;
4269 TREE_CHAIN (f_fpr) = f_ovf;
4270 TREE_CHAIN (f_ovf) = f_sav;
4271
4272 layout_type (record);
4273
4274 /* The correct type is an array type of one element. */
4275 return build_array_type (record, build_index_type (size_zero_node));
4276 }
4277
4278 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4279
4280 static void
4281 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4282 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4283 int no_rtl)
4284 {
4285 CUMULATIVE_ARGS next_cum;
4286 rtx save_area = NULL_RTX, mem;
4287 rtx label;
4288 rtx label_ref;
4289 rtx tmp_reg;
4290 rtx nsse_reg;
4291 int set;
4292 tree fntype;
4293 int stdarg_p;
4294 int i;
4295
4296 if (!TARGET_64BIT)
4297 return;
4298
4299 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4300 return;
4301
4302 /* Indicate to allocate space on the stack for varargs save area. */
4303 ix86_save_varrargs_registers = 1;
4304
4305 cfun->stack_alignment_needed = 128;
4306
4307 fntype = TREE_TYPE (current_function_decl);
4308 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4309 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4310 != void_type_node));
4311
4312 /* For varargs, we do not want to skip the dummy va_dcl argument.
4313 For stdargs, we do want to skip the last named argument. */
4314 next_cum = *cum;
4315 if (stdarg_p)
4316 function_arg_advance (&next_cum, mode, type, 1);
4317
4318 if (!no_rtl)
4319 save_area = frame_pointer_rtx;
4320
4321 set = get_varargs_alias_set ();
4322
4323 for (i = next_cum.regno;
4324 i < ix86_regparm
4325 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4326 i++)
4327 {
4328 mem = gen_rtx_MEM (Pmode,
4329 plus_constant (save_area, i * UNITS_PER_WORD));
4330 MEM_NOTRAP_P (mem) = 1;
4331 set_mem_alias_set (mem, set);
4332 emit_move_insn (mem, gen_rtx_REG (Pmode,
4333 x86_64_int_parameter_registers[i]));
4334 }
4335
4336 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4337 {
4338 /* Now emit code to save SSE registers. The AX parameter contains number
4339 of SSE parameter registers used to call this function. We use
4340 sse_prologue_save insn template that produces computed jump across
4341 SSE saves. We need some preparation work to get this working. */
4342
4343 label = gen_label_rtx ();
4344 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4345
4346 /* Compute address to jump to :
4347 label - 5*eax + nnamed_sse_arguments*5 */
4348 tmp_reg = gen_reg_rtx (Pmode);
4349 nsse_reg = gen_reg_rtx (Pmode);
4350 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4351 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4352 gen_rtx_MULT (Pmode, nsse_reg,
4353 GEN_INT (4))));
4354 if (next_cum.sse_regno)
4355 emit_move_insn
4356 (nsse_reg,
4357 gen_rtx_CONST (DImode,
4358 gen_rtx_PLUS (DImode,
4359 label_ref,
4360 GEN_INT (next_cum.sse_regno * 4))));
4361 else
4362 emit_move_insn (nsse_reg, label_ref);
4363 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4364
4365 /* Compute address of memory block we save into. We always use pointer
4366 pointing 127 bytes after first byte to store - this is needed to keep
4367 instruction size limited by 4 bytes. */
4368 tmp_reg = gen_reg_rtx (Pmode);
4369 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4370 plus_constant (save_area,
4371 8 * REGPARM_MAX + 127)));
4372 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4373 MEM_NOTRAP_P (mem) = 1;
4374 set_mem_alias_set (mem, set);
4375 set_mem_align (mem, BITS_PER_WORD);
4376
4377 /* And finally do the dirty job! */
4378 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4379 GEN_INT (next_cum.sse_regno), label));
4380 }
4381
4382 }
4383
4384 /* Implement va_start. */
4385
4386 void
4387 ix86_va_start (tree valist, rtx nextarg)
4388 {
4389 HOST_WIDE_INT words, n_gpr, n_fpr;
4390 tree f_gpr, f_fpr, f_ovf, f_sav;
4391 tree gpr, fpr, ovf, sav, t;
4392 tree type;
4393
4394 /* Only 64bit target needs something special. */
4395 if (!TARGET_64BIT)
4396 {
4397 std_expand_builtin_va_start (valist, nextarg);
4398 return;
4399 }
4400
4401 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4402 f_fpr = TREE_CHAIN (f_gpr);
4403 f_ovf = TREE_CHAIN (f_fpr);
4404 f_sav = TREE_CHAIN (f_ovf);
4405
4406 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4407 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4408 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4409 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4410 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4411
4412 /* Count number of gp and fp argument registers used. */
4413 words = current_function_args_info.words;
4414 n_gpr = current_function_args_info.regno;
4415 n_fpr = current_function_args_info.sse_regno;
4416
4417 if (TARGET_DEBUG_ARG)
4418 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4419 (int) words, (int) n_gpr, (int) n_fpr);
4420
4421 if (cfun->va_list_gpr_size)
4422 {
4423 type = TREE_TYPE (gpr);
4424 t = build2 (MODIFY_EXPR, type, gpr,
4425 build_int_cst (type, n_gpr * 8));
4426 TREE_SIDE_EFFECTS (t) = 1;
4427 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4428 }
4429
4430 if (cfun->va_list_fpr_size)
4431 {
4432 type = TREE_TYPE (fpr);
4433 t = build2 (MODIFY_EXPR, type, fpr,
4434 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4435 TREE_SIDE_EFFECTS (t) = 1;
4436 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4437 }
4438
4439 /* Find the overflow area. */
4440 type = TREE_TYPE (ovf);
4441 t = make_tree (type, virtual_incoming_args_rtx);
4442 if (words != 0)
4443 t = build2 (PLUS_EXPR, type, t,
4444 build_int_cst (type, words * UNITS_PER_WORD));
4445 t = build2 (MODIFY_EXPR, type, ovf, t);
4446 TREE_SIDE_EFFECTS (t) = 1;
4447 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4448
4449 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4450 {
4451 /* Find the register save area.
4452 Prologue of the function save it right above stack frame. */
4453 type = TREE_TYPE (sav);
4454 t = make_tree (type, frame_pointer_rtx);
4455 t = build2 (MODIFY_EXPR, type, sav, t);
4456 TREE_SIDE_EFFECTS (t) = 1;
4457 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4458 }
4459 }
4460
4461 /* Implement va_arg. */
4462
4463 tree
4464 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4465 {
4466 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4467 tree f_gpr, f_fpr, f_ovf, f_sav;
4468 tree gpr, fpr, ovf, sav, t;
4469 int size, rsize;
4470 tree lab_false, lab_over = NULL_TREE;
4471 tree addr, t2;
4472 rtx container;
4473 int indirect_p = 0;
4474 tree ptrtype;
4475 enum machine_mode nat_mode;
4476
4477 /* Only 64bit target needs something special. */
4478 if (!TARGET_64BIT)
4479 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4480
4481 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4482 f_fpr = TREE_CHAIN (f_gpr);
4483 f_ovf = TREE_CHAIN (f_fpr);
4484 f_sav = TREE_CHAIN (f_ovf);
4485
4486 valist = build_va_arg_indirect_ref (valist);
4487 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4488 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4489 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4490 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4491
4492 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4493 if (indirect_p)
4494 type = build_pointer_type (type);
4495 size = int_size_in_bytes (type);
4496 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4497
4498 nat_mode = type_natural_mode (type);
4499 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4500 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4501
4502 /* Pull the value out of the saved registers. */
4503
4504 addr = create_tmp_var (ptr_type_node, "addr");
4505 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4506
4507 if (container)
4508 {
4509 int needed_intregs, needed_sseregs;
4510 bool need_temp;
4511 tree int_addr, sse_addr;
4512
4513 lab_false = create_artificial_label ();
4514 lab_over = create_artificial_label ();
4515
4516 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4517
4518 need_temp = (!REG_P (container)
4519 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4520 || TYPE_ALIGN (type) > 128));
4521
4522 /* In case we are passing structure, verify that it is consecutive block
4523 on the register save area. If not we need to do moves. */
4524 if (!need_temp && !REG_P (container))
4525 {
4526 /* Verify that all registers are strictly consecutive */
4527 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4528 {
4529 int i;
4530
4531 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4532 {
4533 rtx slot = XVECEXP (container, 0, i);
4534 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4535 || INTVAL (XEXP (slot, 1)) != i * 16)
4536 need_temp = 1;
4537 }
4538 }
4539 else
4540 {
4541 int i;
4542
4543 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4544 {
4545 rtx slot = XVECEXP (container, 0, i);
4546 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4547 || INTVAL (XEXP (slot, 1)) != i * 8)
4548 need_temp = 1;
4549 }
4550 }
4551 }
4552 if (!need_temp)
4553 {
4554 int_addr = addr;
4555 sse_addr = addr;
4556 }
4557 else
4558 {
4559 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4560 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4561 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4562 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4563 }
4564
4565 /* First ensure that we fit completely in registers. */
4566 if (needed_intregs)
4567 {
4568 t = build_int_cst (TREE_TYPE (gpr),
4569 (REGPARM_MAX - needed_intregs + 1) * 8);
4570 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4571 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4572 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4573 gimplify_and_add (t, pre_p);
4574 }
4575 if (needed_sseregs)
4576 {
4577 t = build_int_cst (TREE_TYPE (fpr),
4578 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4579 + REGPARM_MAX * 8);
4580 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4581 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4582 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4583 gimplify_and_add (t, pre_p);
4584 }
4585
4586 /* Compute index to start of area used for integer regs. */
4587 if (needed_intregs)
4588 {
4589 /* int_addr = gpr + sav; */
4590 t = fold_convert (ptr_type_node, gpr);
4591 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4592 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4593 gimplify_and_add (t, pre_p);
4594 }
4595 if (needed_sseregs)
4596 {
4597 /* sse_addr = fpr + sav; */
4598 t = fold_convert (ptr_type_node, fpr);
4599 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4600 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4601 gimplify_and_add (t, pre_p);
4602 }
4603 if (need_temp)
4604 {
4605 int i;
4606 tree temp = create_tmp_var (type, "va_arg_tmp");
4607
4608 /* addr = &temp; */
4609 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4610 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4611 gimplify_and_add (t, pre_p);
4612
4613 for (i = 0; i < XVECLEN (container, 0); i++)
4614 {
4615 rtx slot = XVECEXP (container, 0, i);
4616 rtx reg = XEXP (slot, 0);
4617 enum machine_mode mode = GET_MODE (reg);
4618 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4619 tree addr_type = build_pointer_type (piece_type);
4620 tree src_addr, src;
4621 int src_offset;
4622 tree dest_addr, dest;
4623
4624 if (SSE_REGNO_P (REGNO (reg)))
4625 {
4626 src_addr = sse_addr;
4627 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4628 }
4629 else
4630 {
4631 src_addr = int_addr;
4632 src_offset = REGNO (reg) * 8;
4633 }
4634 src_addr = fold_convert (addr_type, src_addr);
4635 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4636 size_int (src_offset)));
4637 src = build_va_arg_indirect_ref (src_addr);
4638
4639 dest_addr = fold_convert (addr_type, addr);
4640 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4641 size_int (INTVAL (XEXP (slot, 1)))));
4642 dest = build_va_arg_indirect_ref (dest_addr);
4643
4644 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4645 gimplify_and_add (t, pre_p);
4646 }
4647 }
4648
4649 if (needed_intregs)
4650 {
4651 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4652 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4653 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4654 gimplify_and_add (t, pre_p);
4655 }
4656 if (needed_sseregs)
4657 {
4658 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4659 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4660 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4661 gimplify_and_add (t, pre_p);
4662 }
4663
4664 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4665 gimplify_and_add (t, pre_p);
4666
4667 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4668 append_to_statement_list (t, pre_p);
4669 }
4670
4671 /* ... otherwise out of the overflow area. */
4672
4673 /* Care for on-stack alignment if needed. */
4674 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4675 || integer_zerop (TYPE_SIZE (type)))
4676 t = ovf;
4677 else
4678 {
4679 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4680 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4681 build_int_cst (TREE_TYPE (ovf), align - 1));
4682 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4683 build_int_cst (TREE_TYPE (t), -align));
4684 }
4685 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4686
4687 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4688 gimplify_and_add (t2, pre_p);
4689
4690 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4691 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4692 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4693 gimplify_and_add (t, pre_p);
4694
4695 if (container)
4696 {
4697 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4698 append_to_statement_list (t, pre_p);
4699 }
4700
4701 ptrtype = build_pointer_type (type);
4702 addr = fold_convert (ptrtype, addr);
4703
4704 if (indirect_p)
4705 addr = build_va_arg_indirect_ref (addr);
4706 return build_va_arg_indirect_ref (addr);
4707 }
4708 \f
4709 /* Return nonzero if OPNUM's MEM should be matched
4710 in movabs* patterns. */
4711
4712 int
4713 ix86_check_movabs (rtx insn, int opnum)
4714 {
4715 rtx set, mem;
4716
4717 set = PATTERN (insn);
4718 if (GET_CODE (set) == PARALLEL)
4719 set = XVECEXP (set, 0, 0);
4720 gcc_assert (GET_CODE (set) == SET);
4721 mem = XEXP (set, opnum);
4722 while (GET_CODE (mem) == SUBREG)
4723 mem = SUBREG_REG (mem);
4724 gcc_assert (GET_CODE (mem) == MEM);
4725 return (volatile_ok || !MEM_VOLATILE_P (mem));
4726 }
4727 \f
4728 /* Initialize the table of extra 80387 mathematical constants. */
4729
4730 static void
4731 init_ext_80387_constants (void)
4732 {
4733 static const char * cst[5] =
4734 {
4735 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4736 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4737 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4738 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4739 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4740 };
4741 int i;
4742
4743 for (i = 0; i < 5; i++)
4744 {
4745 real_from_string (&ext_80387_constants_table[i], cst[i]);
4746 /* Ensure each constant is rounded to XFmode precision. */
4747 real_convert (&ext_80387_constants_table[i],
4748 XFmode, &ext_80387_constants_table[i]);
4749 }
4750
4751 ext_80387_constants_init = 1;
4752 }
4753
4754 /* Return true if the constant is something that can be loaded with
4755 a special instruction. */
4756
4757 int
4758 standard_80387_constant_p (rtx x)
4759 {
4760 REAL_VALUE_TYPE r;
4761
4762 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4763 return -1;
4764
4765 if (x == CONST0_RTX (GET_MODE (x)))
4766 return 1;
4767 if (x == CONST1_RTX (GET_MODE (x)))
4768 return 2;
4769
4770 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4771
4772 /* For XFmode constants, try to find a special 80387 instruction when
4773 optimizing for size or on those CPUs that benefit from them. */
4774 if (GET_MODE (x) == XFmode
4775 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4776 {
4777 int i;
4778
4779 if (! ext_80387_constants_init)
4780 init_ext_80387_constants ();
4781
4782 for (i = 0; i < 5; i++)
4783 if (real_identical (&r, &ext_80387_constants_table[i]))
4784 return i + 3;
4785 }
4786
4787 /* Load of the constant -0.0 or -1.0 will be split as
4788 fldz;fchs or fld1;fchs sequence. */
4789 if (real_isnegzero (&r))
4790 return 8;
4791 if (real_identical (&r, &dconstm1))
4792 return 9;
4793
4794 return 0;
4795 }
4796
4797 /* Return the opcode of the special instruction to be used to load
4798 the constant X. */
4799
4800 const char *
4801 standard_80387_constant_opcode (rtx x)
4802 {
4803 switch (standard_80387_constant_p (x))
4804 {
4805 case 1:
4806 return "fldz";
4807 case 2:
4808 return "fld1";
4809 case 3:
4810 return "fldlg2";
4811 case 4:
4812 return "fldln2";
4813 case 5:
4814 return "fldl2e";
4815 case 6:
4816 return "fldl2t";
4817 case 7:
4818 return "fldpi";
4819 case 8:
4820 case 9:
4821 return "#";
4822 default:
4823 gcc_unreachable ();
4824 }
4825 }
4826
4827 /* Return the CONST_DOUBLE representing the 80387 constant that is
4828 loaded by the specified special instruction. The argument IDX
4829 matches the return value from standard_80387_constant_p. */
4830
4831 rtx
4832 standard_80387_constant_rtx (int idx)
4833 {
4834 int i;
4835
4836 if (! ext_80387_constants_init)
4837 init_ext_80387_constants ();
4838
4839 switch (idx)
4840 {
4841 case 3:
4842 case 4:
4843 case 5:
4844 case 6:
4845 case 7:
4846 i = idx - 3;
4847 break;
4848
4849 default:
4850 gcc_unreachable ();
4851 }
4852
4853 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4854 XFmode);
4855 }
4856
4857 /* Return 1 if mode is a valid mode for sse. */
4858 static int
4859 standard_sse_mode_p (enum machine_mode mode)
4860 {
4861 switch (mode)
4862 {
4863 case V16QImode:
4864 case V8HImode:
4865 case V4SImode:
4866 case V2DImode:
4867 case V4SFmode:
4868 case V2DFmode:
4869 return 1;
4870
4871 default:
4872 return 0;
4873 }
4874 }
4875
4876 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4877 */
4878 int
4879 standard_sse_constant_p (rtx x)
4880 {
4881 enum machine_mode mode = GET_MODE (x);
4882
4883 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4884 return 1;
4885 if (vector_all_ones_operand (x, mode)
4886 && standard_sse_mode_p (mode))
4887 return TARGET_SSE2 ? 2 : -1;
4888
4889 return 0;
4890 }
4891
4892 /* Return the opcode of the special instruction to be used to load
4893 the constant X. */
4894
4895 const char *
4896 standard_sse_constant_opcode (rtx insn, rtx x)
4897 {
4898 switch (standard_sse_constant_p (x))
4899 {
4900 case 1:
4901 if (get_attr_mode (insn) == MODE_V4SF)
4902 return "xorps\t%0, %0";
4903 else if (get_attr_mode (insn) == MODE_V2DF)
4904 return "xorpd\t%0, %0";
4905 else
4906 return "pxor\t%0, %0";
4907 case 2:
4908 return "pcmpeqd\t%0, %0";
4909 }
4910 gcc_unreachable ();
4911 }
4912
4913 /* Returns 1 if OP contains a symbol reference */
4914
4915 int
4916 symbolic_reference_mentioned_p (rtx op)
4917 {
4918 const char *fmt;
4919 int i;
4920
4921 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4922 return 1;
4923
4924 fmt = GET_RTX_FORMAT (GET_CODE (op));
4925 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4926 {
4927 if (fmt[i] == 'E')
4928 {
4929 int j;
4930
4931 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4932 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4933 return 1;
4934 }
4935
4936 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4937 return 1;
4938 }
4939
4940 return 0;
4941 }
4942
4943 /* Return 1 if it is appropriate to emit `ret' instructions in the
4944 body of a function. Do this only if the epilogue is simple, needing a
4945 couple of insns. Prior to reloading, we can't tell how many registers
4946 must be saved, so return 0 then. Return 0 if there is no frame
4947 marker to de-allocate. */
4948
4949 int
4950 ix86_can_use_return_insn_p (void)
4951 {
4952 struct ix86_frame frame;
4953
4954 if (! reload_completed || frame_pointer_needed)
4955 return 0;
4956
4957 /* Don't allow more than 32 pop, since that's all we can do
4958 with one instruction. */
4959 if (current_function_pops_args
4960 && current_function_args_size >= 32768)
4961 return 0;
4962
4963 ix86_compute_frame_layout (&frame);
4964 return frame.to_allocate == 0 && frame.nregs == 0;
4965 }
4966 \f
4967 /* Value should be nonzero if functions must have frame pointers.
4968 Zero means the frame pointer need not be set up (and parms may
4969 be accessed via the stack pointer) in functions that seem suitable. */
4970
4971 int
4972 ix86_frame_pointer_required (void)
4973 {
4974 /* If we accessed previous frames, then the generated code expects
4975 to be able to access the saved ebp value in our frame. */
4976 if (cfun->machine->accesses_prev_frame)
4977 return 1;
4978
4979 /* Several x86 os'es need a frame pointer for other reasons,
4980 usually pertaining to setjmp. */
4981 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4982 return 1;
4983
4984 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4985 the frame pointer by default. Turn it back on now if we've not
4986 got a leaf function. */
4987 if (TARGET_OMIT_LEAF_FRAME_POINTER
4988 && (!current_function_is_leaf
4989 || ix86_current_function_calls_tls_descriptor))
4990 return 1;
4991
4992 if (current_function_profile)
4993 return 1;
4994
4995 return 0;
4996 }
4997
4998 /* Record that the current function accesses previous call frames. */
4999
5000 void
5001 ix86_setup_frame_addresses (void)
5002 {
5003 cfun->machine->accesses_prev_frame = 1;
5004 }
5005 \f
5006 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5007 # define USE_HIDDEN_LINKONCE 1
5008 #else
5009 # define USE_HIDDEN_LINKONCE 0
5010 #endif
5011
5012 static int pic_labels_used;
5013
5014 /* Fills in the label name that should be used for a pc thunk for
5015 the given register. */
5016
5017 static void
5018 get_pc_thunk_name (char name[32], unsigned int regno)
5019 {
5020 gcc_assert (!TARGET_64BIT);
5021
5022 if (USE_HIDDEN_LINKONCE)
5023 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5024 else
5025 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5026 }
5027
5028
5029 /* This function generates code for -fpic that loads %ebx with
5030 the return address of the caller and then returns. */
5031
5032 void
5033 ix86_file_end (void)
5034 {
5035 rtx xops[2];
5036 int regno;
5037
5038 for (regno = 0; regno < 8; ++regno)
5039 {
5040 char name[32];
5041
5042 if (! ((pic_labels_used >> regno) & 1))
5043 continue;
5044
5045 get_pc_thunk_name (name, regno);
5046
5047 #if TARGET_MACHO
5048 if (TARGET_MACHO)
5049 {
5050 switch_to_section (darwin_sections[text_coal_section]);
5051 fputs ("\t.weak_definition\t", asm_out_file);
5052 assemble_name (asm_out_file, name);
5053 fputs ("\n\t.private_extern\t", asm_out_file);
5054 assemble_name (asm_out_file, name);
5055 fputs ("\n", asm_out_file);
5056 ASM_OUTPUT_LABEL (asm_out_file, name);
5057 }
5058 else
5059 #endif
5060 if (USE_HIDDEN_LINKONCE)
5061 {
5062 tree decl;
5063
5064 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5065 error_mark_node);
5066 TREE_PUBLIC (decl) = 1;
5067 TREE_STATIC (decl) = 1;
5068 DECL_ONE_ONLY (decl) = 1;
5069
5070 (*targetm.asm_out.unique_section) (decl, 0);
5071 switch_to_section (get_named_section (decl, NULL, 0));
5072
5073 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5074 fputs ("\t.hidden\t", asm_out_file);
5075 assemble_name (asm_out_file, name);
5076 fputc ('\n', asm_out_file);
5077 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5078 }
5079 else
5080 {
5081 switch_to_section (text_section);
5082 ASM_OUTPUT_LABEL (asm_out_file, name);
5083 }
5084
5085 xops[0] = gen_rtx_REG (SImode, regno);
5086 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5087 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5088 output_asm_insn ("ret", xops);
5089 }
5090
5091 if (NEED_INDICATE_EXEC_STACK)
5092 file_end_indicate_exec_stack ();
5093 }
5094
5095 /* Emit code for the SET_GOT patterns. */
5096
5097 const char *
5098 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5099 {
5100 rtx xops[3];
5101
5102 xops[0] = dest;
5103 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5104
5105 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5106 {
5107 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5108
5109 if (!flag_pic)
5110 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5111 else
5112 output_asm_insn ("call\t%a2", xops);
5113
5114 #if TARGET_MACHO
5115 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5116 is what will be referenced by the Mach-O PIC subsystem. */
5117 if (!label)
5118 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5119 #endif
5120
5121 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5122 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5123
5124 if (flag_pic)
5125 output_asm_insn ("pop{l}\t%0", xops);
5126 }
5127 else
5128 {
5129 char name[32];
5130 get_pc_thunk_name (name, REGNO (dest));
5131 pic_labels_used |= 1 << REGNO (dest);
5132
5133 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5134 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5135 output_asm_insn ("call\t%X2", xops);
5136 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5137 is what will be referenced by the Mach-O PIC subsystem. */
5138 #if TARGET_MACHO
5139 if (!label)
5140 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5141 else
5142 targetm.asm_out.internal_label (asm_out_file, "L",
5143 CODE_LABEL_NUMBER (label));
5144 #endif
5145 }
5146
5147 if (TARGET_MACHO)
5148 return "";
5149
5150 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5151 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5152 else
5153 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5154
5155 return "";
5156 }
5157
5158 /* Generate an "push" pattern for input ARG. */
5159
5160 static rtx
5161 gen_push (rtx arg)
5162 {
5163 return gen_rtx_SET (VOIDmode,
5164 gen_rtx_MEM (Pmode,
5165 gen_rtx_PRE_DEC (Pmode,
5166 stack_pointer_rtx)),
5167 arg);
5168 }
5169
5170 /* Return >= 0 if there is an unused call-clobbered register available
5171 for the entire function. */
5172
5173 static unsigned int
5174 ix86_select_alt_pic_regnum (void)
5175 {
5176 if (current_function_is_leaf && !current_function_profile
5177 && !ix86_current_function_calls_tls_descriptor)
5178 {
5179 int i;
5180 for (i = 2; i >= 0; --i)
5181 if (!regs_ever_live[i])
5182 return i;
5183 }
5184
5185 return INVALID_REGNUM;
5186 }
5187
5188 /* Return 1 if we need to save REGNO. */
5189 static int
5190 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5191 {
5192 if (pic_offset_table_rtx
5193 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5194 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5195 || current_function_profile
5196 || current_function_calls_eh_return
5197 || current_function_uses_const_pool))
5198 {
5199 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5200 return 0;
5201 return 1;
5202 }
5203
5204 if (current_function_calls_eh_return && maybe_eh_return)
5205 {
5206 unsigned i;
5207 for (i = 0; ; i++)
5208 {
5209 unsigned test = EH_RETURN_DATA_REGNO (i);
5210 if (test == INVALID_REGNUM)
5211 break;
5212 if (test == regno)
5213 return 1;
5214 }
5215 }
5216
5217 if (cfun->machine->force_align_arg_pointer
5218 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5219 return 1;
5220
5221 return (regs_ever_live[regno]
5222 && !call_used_regs[regno]
5223 && !fixed_regs[regno]
5224 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5225 }
5226
5227 /* Return number of registers to be saved on the stack. */
5228
5229 static int
5230 ix86_nsaved_regs (void)
5231 {
5232 int nregs = 0;
5233 int regno;
5234
5235 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5236 if (ix86_save_reg (regno, true))
5237 nregs++;
5238 return nregs;
5239 }
5240
5241 /* Return the offset between two registers, one to be eliminated, and the other
5242 its replacement, at the start of a routine. */
5243
5244 HOST_WIDE_INT
5245 ix86_initial_elimination_offset (int from, int to)
5246 {
5247 struct ix86_frame frame;
5248 ix86_compute_frame_layout (&frame);
5249
5250 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5251 return frame.hard_frame_pointer_offset;
5252 else if (from == FRAME_POINTER_REGNUM
5253 && to == HARD_FRAME_POINTER_REGNUM)
5254 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5255 else
5256 {
5257 gcc_assert (to == STACK_POINTER_REGNUM);
5258
5259 if (from == ARG_POINTER_REGNUM)
5260 return frame.stack_pointer_offset;
5261
5262 gcc_assert (from == FRAME_POINTER_REGNUM);
5263 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5264 }
5265 }
5266
5267 /* Fill structure ix86_frame about frame of currently computed function. */
5268
5269 static void
5270 ix86_compute_frame_layout (struct ix86_frame *frame)
5271 {
5272 HOST_WIDE_INT total_size;
5273 unsigned int stack_alignment_needed;
5274 HOST_WIDE_INT offset;
5275 unsigned int preferred_alignment;
5276 HOST_WIDE_INT size = get_frame_size ();
5277
5278 frame->nregs = ix86_nsaved_regs ();
5279 total_size = size;
5280
5281 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5282 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5283
5284 /* During reload iteration the amount of registers saved can change.
5285 Recompute the value as needed. Do not recompute when amount of registers
5286 didn't change as reload does multiple calls to the function and does not
5287 expect the decision to change within single iteration. */
5288 if (!optimize_size
5289 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5290 {
5291 int count = frame->nregs;
5292
5293 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5294 /* The fast prologue uses move instead of push to save registers. This
5295 is significantly longer, but also executes faster as modern hardware
5296 can execute the moves in parallel, but can't do that for push/pop.
5297
5298 Be careful about choosing what prologue to emit: When function takes
5299 many instructions to execute we may use slow version as well as in
5300 case function is known to be outside hot spot (this is known with
5301 feedback only). Weight the size of function by number of registers
5302 to save as it is cheap to use one or two push instructions but very
5303 slow to use many of them. */
5304 if (count)
5305 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5306 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5307 || (flag_branch_probabilities
5308 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5309 cfun->machine->use_fast_prologue_epilogue = false;
5310 else
5311 cfun->machine->use_fast_prologue_epilogue
5312 = !expensive_function_p (count);
5313 }
5314 if (TARGET_PROLOGUE_USING_MOVE
5315 && cfun->machine->use_fast_prologue_epilogue)
5316 frame->save_regs_using_mov = true;
5317 else
5318 frame->save_regs_using_mov = false;
5319
5320
5321 /* Skip return address and saved base pointer. */
5322 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5323
5324 frame->hard_frame_pointer_offset = offset;
5325
5326 /* Do some sanity checking of stack_alignment_needed and
5327 preferred_alignment, since i386 port is the only using those features
5328 that may break easily. */
5329
5330 gcc_assert (!size || stack_alignment_needed);
5331 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5332 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5333 gcc_assert (stack_alignment_needed
5334 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5335
5336 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5337 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5338
5339 /* Register save area */
5340 offset += frame->nregs * UNITS_PER_WORD;
5341
5342 /* Va-arg area */
5343 if (ix86_save_varrargs_registers)
5344 {
5345 offset += X86_64_VARARGS_SIZE;
5346 frame->va_arg_size = X86_64_VARARGS_SIZE;
5347 }
5348 else
5349 frame->va_arg_size = 0;
5350
5351 /* Align start of frame for local function. */
5352 frame->padding1 = ((offset + stack_alignment_needed - 1)
5353 & -stack_alignment_needed) - offset;
5354
5355 offset += frame->padding1;
5356
5357 /* Frame pointer points here. */
5358 frame->frame_pointer_offset = offset;
5359
5360 offset += size;
5361
5362 /* Add outgoing arguments area. Can be skipped if we eliminated
5363 all the function calls as dead code.
5364 Skipping is however impossible when function calls alloca. Alloca
5365 expander assumes that last current_function_outgoing_args_size
5366 of stack frame are unused. */
5367 if (ACCUMULATE_OUTGOING_ARGS
5368 && (!current_function_is_leaf || current_function_calls_alloca
5369 || ix86_current_function_calls_tls_descriptor))
5370 {
5371 offset += current_function_outgoing_args_size;
5372 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5373 }
5374 else
5375 frame->outgoing_arguments_size = 0;
5376
5377 /* Align stack boundary. Only needed if we're calling another function
5378 or using alloca. */
5379 if (!current_function_is_leaf || current_function_calls_alloca
5380 || ix86_current_function_calls_tls_descriptor)
5381 frame->padding2 = ((offset + preferred_alignment - 1)
5382 & -preferred_alignment) - offset;
5383 else
5384 frame->padding2 = 0;
5385
5386 offset += frame->padding2;
5387
5388 /* We've reached end of stack frame. */
5389 frame->stack_pointer_offset = offset;
5390
5391 /* Size prologue needs to allocate. */
5392 frame->to_allocate =
5393 (size + frame->padding1 + frame->padding2
5394 + frame->outgoing_arguments_size + frame->va_arg_size);
5395
5396 if ((!frame->to_allocate && frame->nregs <= 1)
5397 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5398 frame->save_regs_using_mov = false;
5399
5400 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5401 && current_function_is_leaf
5402 && !ix86_current_function_calls_tls_descriptor)
5403 {
5404 frame->red_zone_size = frame->to_allocate;
5405 if (frame->save_regs_using_mov)
5406 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5407 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5408 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5409 }
5410 else
5411 frame->red_zone_size = 0;
5412 frame->to_allocate -= frame->red_zone_size;
5413 frame->stack_pointer_offset -= frame->red_zone_size;
5414 #if 0
5415 fprintf (stderr, "nregs: %i\n", frame->nregs);
5416 fprintf (stderr, "size: %i\n", size);
5417 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5418 fprintf (stderr, "padding1: %i\n", frame->padding1);
5419 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5420 fprintf (stderr, "padding2: %i\n", frame->padding2);
5421 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5422 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5423 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5424 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5425 frame->hard_frame_pointer_offset);
5426 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5427 #endif
5428 }
5429
5430 /* Emit code to save registers in the prologue. */
5431
5432 static void
5433 ix86_emit_save_regs (void)
5434 {
5435 unsigned int regno;
5436 rtx insn;
5437
5438 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5439 if (ix86_save_reg (regno, true))
5440 {
5441 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5442 RTX_FRAME_RELATED_P (insn) = 1;
5443 }
5444 }
5445
5446 /* Emit code to save registers using MOV insns. First register
5447 is restored from POINTER + OFFSET. */
5448 static void
5449 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5450 {
5451 unsigned int regno;
5452 rtx insn;
5453
5454 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5455 if (ix86_save_reg (regno, true))
5456 {
5457 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5458 Pmode, offset),
5459 gen_rtx_REG (Pmode, regno));
5460 RTX_FRAME_RELATED_P (insn) = 1;
5461 offset += UNITS_PER_WORD;
5462 }
5463 }
5464
5465 /* Expand prologue or epilogue stack adjustment.
5466 The pattern exist to put a dependency on all ebp-based memory accesses.
5467 STYLE should be negative if instructions should be marked as frame related,
5468 zero if %r11 register is live and cannot be freely used and positive
5469 otherwise. */
5470
5471 static void
5472 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5473 {
5474 rtx insn;
5475
5476 if (! TARGET_64BIT)
5477 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5478 else if (x86_64_immediate_operand (offset, DImode))
5479 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5480 else
5481 {
5482 rtx r11;
5483 /* r11 is used by indirect sibcall return as well, set before the
5484 epilogue and used after the epilogue. ATM indirect sibcall
5485 shouldn't be used together with huge frame sizes in one
5486 function because of the frame_size check in sibcall.c. */
5487 gcc_assert (style);
5488 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5489 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5490 if (style < 0)
5491 RTX_FRAME_RELATED_P (insn) = 1;
5492 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5493 offset));
5494 }
5495 if (style < 0)
5496 RTX_FRAME_RELATED_P (insn) = 1;
5497 }
5498
5499 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5500
5501 static rtx
5502 ix86_internal_arg_pointer (void)
5503 {
5504 bool has_force_align_arg_pointer =
5505 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5506 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5507 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5508 && DECL_NAME (current_function_decl)
5509 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5510 && DECL_FILE_SCOPE_P (current_function_decl))
5511 || ix86_force_align_arg_pointer
5512 || has_force_align_arg_pointer)
5513 {
5514 /* Nested functions can't realign the stack due to a register
5515 conflict. */
5516 if (DECL_CONTEXT (current_function_decl)
5517 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5518 {
5519 if (ix86_force_align_arg_pointer)
5520 warning (0, "-mstackrealign ignored for nested functions");
5521 if (has_force_align_arg_pointer)
5522 error ("%s not supported for nested functions",
5523 ix86_force_align_arg_pointer_string);
5524 return virtual_incoming_args_rtx;
5525 }
5526 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5527 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5528 }
5529 else
5530 return virtual_incoming_args_rtx;
5531 }
5532
5533 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5534 This is called from dwarf2out.c to emit call frame instructions
5535 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5536 static void
5537 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5538 {
5539 rtx unspec = SET_SRC (pattern);
5540 gcc_assert (GET_CODE (unspec) == UNSPEC);
5541
5542 switch (index)
5543 {
5544 case UNSPEC_REG_SAVE:
5545 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5546 SET_DEST (pattern));
5547 break;
5548 case UNSPEC_DEF_CFA:
5549 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5550 INTVAL (XVECEXP (unspec, 0, 0)));
5551 break;
5552 default:
5553 gcc_unreachable ();
5554 }
5555 }
5556
5557 /* Expand the prologue into a bunch of separate insns. */
5558
5559 void
5560 ix86_expand_prologue (void)
5561 {
5562 rtx insn;
5563 bool pic_reg_used;
5564 struct ix86_frame frame;
5565 HOST_WIDE_INT allocate;
5566
5567 ix86_compute_frame_layout (&frame);
5568
5569 if (cfun->machine->force_align_arg_pointer)
5570 {
5571 rtx x, y;
5572
5573 /* Grab the argument pointer. */
5574 x = plus_constant (stack_pointer_rtx, 4);
5575 y = cfun->machine->force_align_arg_pointer;
5576 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5577 RTX_FRAME_RELATED_P (insn) = 1;
5578
5579 /* The unwind info consists of two parts: install the fafp as the cfa,
5580 and record the fafp as the "save register" of the stack pointer.
5581 The later is there in order that the unwinder can see where it
5582 should restore the stack pointer across the and insn. */
5583 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5584 x = gen_rtx_SET (VOIDmode, y, x);
5585 RTX_FRAME_RELATED_P (x) = 1;
5586 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5587 UNSPEC_REG_SAVE);
5588 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5589 RTX_FRAME_RELATED_P (y) = 1;
5590 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5591 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5592 REG_NOTES (insn) = x;
5593
5594 /* Align the stack. */
5595 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5596 GEN_INT (-16)));
5597
5598 /* And here we cheat like madmen with the unwind info. We force the
5599 cfa register back to sp+4, which is exactly what it was at the
5600 start of the function. Re-pushing the return address results in
5601 the return at the same spot relative to the cfa, and thus is
5602 correct wrt the unwind info. */
5603 x = cfun->machine->force_align_arg_pointer;
5604 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5605 insn = emit_insn (gen_push (x));
5606 RTX_FRAME_RELATED_P (insn) = 1;
5607
5608 x = GEN_INT (4);
5609 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5610 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5611 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5612 REG_NOTES (insn) = x;
5613 }
5614
5615 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5616 slower on all targets. Also sdb doesn't like it. */
5617
5618 if (frame_pointer_needed)
5619 {
5620 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5621 RTX_FRAME_RELATED_P (insn) = 1;
5622
5623 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5624 RTX_FRAME_RELATED_P (insn) = 1;
5625 }
5626
5627 allocate = frame.to_allocate;
5628
5629 if (!frame.save_regs_using_mov)
5630 ix86_emit_save_regs ();
5631 else
5632 allocate += frame.nregs * UNITS_PER_WORD;
5633
5634 /* When using red zone we may start register saving before allocating
5635 the stack frame saving one cycle of the prologue. */
5636 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5637 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5638 : stack_pointer_rtx,
5639 -frame.nregs * UNITS_PER_WORD);
5640
5641 if (allocate == 0)
5642 ;
5643 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5644 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5645 GEN_INT (-allocate), -1);
5646 else
5647 {
5648 /* Only valid for Win32. */
5649 rtx eax = gen_rtx_REG (SImode, 0);
5650 bool eax_live = ix86_eax_live_at_start_p ();
5651 rtx t;
5652
5653 gcc_assert (!TARGET_64BIT);
5654
5655 if (eax_live)
5656 {
5657 emit_insn (gen_push (eax));
5658 allocate -= 4;
5659 }
5660
5661 emit_move_insn (eax, GEN_INT (allocate));
5662
5663 insn = emit_insn (gen_allocate_stack_worker (eax));
5664 RTX_FRAME_RELATED_P (insn) = 1;
5665 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5666 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5667 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5668 t, REG_NOTES (insn));
5669
5670 if (eax_live)
5671 {
5672 if (frame_pointer_needed)
5673 t = plus_constant (hard_frame_pointer_rtx,
5674 allocate
5675 - frame.to_allocate
5676 - frame.nregs * UNITS_PER_WORD);
5677 else
5678 t = plus_constant (stack_pointer_rtx, allocate);
5679 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5680 }
5681 }
5682
5683 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5684 {
5685 if (!frame_pointer_needed || !frame.to_allocate)
5686 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5687 else
5688 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5689 -frame.nregs * UNITS_PER_WORD);
5690 }
5691
5692 pic_reg_used = false;
5693 if (pic_offset_table_rtx
5694 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5695 || current_function_profile))
5696 {
5697 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5698
5699 if (alt_pic_reg_used != INVALID_REGNUM)
5700 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5701
5702 pic_reg_used = true;
5703 }
5704
5705 if (pic_reg_used)
5706 {
5707 if (TARGET_64BIT)
5708 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5709 else
5710 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5711
5712 /* Even with accurate pre-reload life analysis, we can wind up
5713 deleting all references to the pic register after reload.
5714 Consider if cross-jumping unifies two sides of a branch
5715 controlled by a comparison vs the only read from a global.
5716 In which case, allow the set_got to be deleted, though we're
5717 too late to do anything about the ebx save in the prologue. */
5718 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5719 }
5720
5721 /* Prevent function calls from be scheduled before the call to mcount.
5722 In the pic_reg_used case, make sure that the got load isn't deleted. */
5723 if (current_function_profile)
5724 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5725 }
5726
5727 /* Emit code to restore saved registers using MOV insns. First register
5728 is restored from POINTER + OFFSET. */
5729 static void
5730 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5731 int maybe_eh_return)
5732 {
5733 int regno;
5734 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5735
5736 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5737 if (ix86_save_reg (regno, maybe_eh_return))
5738 {
5739 /* Ensure that adjust_address won't be forced to produce pointer
5740 out of range allowed by x86-64 instruction set. */
5741 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5742 {
5743 rtx r11;
5744
5745 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5746 emit_move_insn (r11, GEN_INT (offset));
5747 emit_insn (gen_adddi3 (r11, r11, pointer));
5748 base_address = gen_rtx_MEM (Pmode, r11);
5749 offset = 0;
5750 }
5751 emit_move_insn (gen_rtx_REG (Pmode, regno),
5752 adjust_address (base_address, Pmode, offset));
5753 offset += UNITS_PER_WORD;
5754 }
5755 }
5756
5757 /* Restore function stack, frame, and registers. */
5758
5759 void
5760 ix86_expand_epilogue (int style)
5761 {
5762 int regno;
5763 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5764 struct ix86_frame frame;
5765 HOST_WIDE_INT offset;
5766
5767 ix86_compute_frame_layout (&frame);
5768
5769 /* Calculate start of saved registers relative to ebp. Special care
5770 must be taken for the normal return case of a function using
5771 eh_return: the eax and edx registers are marked as saved, but not
5772 restored along this path. */
5773 offset = frame.nregs;
5774 if (current_function_calls_eh_return && style != 2)
5775 offset -= 2;
5776 offset *= -UNITS_PER_WORD;
5777
5778 /* If we're only restoring one register and sp is not valid then
5779 using a move instruction to restore the register since it's
5780 less work than reloading sp and popping the register.
5781
5782 The default code result in stack adjustment using add/lea instruction,
5783 while this code results in LEAVE instruction (or discrete equivalent),
5784 so it is profitable in some other cases as well. Especially when there
5785 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5786 and there is exactly one register to pop. This heuristic may need some
5787 tuning in future. */
5788 if ((!sp_valid && frame.nregs <= 1)
5789 || (TARGET_EPILOGUE_USING_MOVE
5790 && cfun->machine->use_fast_prologue_epilogue
5791 && (frame.nregs > 1 || frame.to_allocate))
5792 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5793 || (frame_pointer_needed && TARGET_USE_LEAVE
5794 && cfun->machine->use_fast_prologue_epilogue
5795 && frame.nregs == 1)
5796 || current_function_calls_eh_return)
5797 {
5798 /* Restore registers. We can use ebp or esp to address the memory
5799 locations. If both are available, default to ebp, since offsets
5800 are known to be small. Only exception is esp pointing directly to the
5801 end of block of saved registers, where we may simplify addressing
5802 mode. */
5803
5804 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5805 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5806 frame.to_allocate, style == 2);
5807 else
5808 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5809 offset, style == 2);
5810
5811 /* eh_return epilogues need %ecx added to the stack pointer. */
5812 if (style == 2)
5813 {
5814 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5815
5816 if (frame_pointer_needed)
5817 {
5818 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5819 tmp = plus_constant (tmp, UNITS_PER_WORD);
5820 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5821
5822 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5823 emit_move_insn (hard_frame_pointer_rtx, tmp);
5824
5825 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5826 const0_rtx, style);
5827 }
5828 else
5829 {
5830 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5831 tmp = plus_constant (tmp, (frame.to_allocate
5832 + frame.nregs * UNITS_PER_WORD));
5833 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5834 }
5835 }
5836 else if (!frame_pointer_needed)
5837 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5838 GEN_INT (frame.to_allocate
5839 + frame.nregs * UNITS_PER_WORD),
5840 style);
5841 /* If not an i386, mov & pop is faster than "leave". */
5842 else if (TARGET_USE_LEAVE || optimize_size
5843 || !cfun->machine->use_fast_prologue_epilogue)
5844 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5845 else
5846 {
5847 pro_epilogue_adjust_stack (stack_pointer_rtx,
5848 hard_frame_pointer_rtx,
5849 const0_rtx, style);
5850 if (TARGET_64BIT)
5851 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5852 else
5853 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5854 }
5855 }
5856 else
5857 {
5858 /* First step is to deallocate the stack frame so that we can
5859 pop the registers. */
5860 if (!sp_valid)
5861 {
5862 gcc_assert (frame_pointer_needed);
5863 pro_epilogue_adjust_stack (stack_pointer_rtx,
5864 hard_frame_pointer_rtx,
5865 GEN_INT (offset), style);
5866 }
5867 else if (frame.to_allocate)
5868 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5869 GEN_INT (frame.to_allocate), style);
5870
5871 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5872 if (ix86_save_reg (regno, false))
5873 {
5874 if (TARGET_64BIT)
5875 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5876 else
5877 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5878 }
5879 if (frame_pointer_needed)
5880 {
5881 /* Leave results in shorter dependency chains on CPUs that are
5882 able to grok it fast. */
5883 if (TARGET_USE_LEAVE)
5884 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5885 else if (TARGET_64BIT)
5886 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5887 else
5888 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5889 }
5890 }
5891
5892 if (cfun->machine->force_align_arg_pointer)
5893 {
5894 emit_insn (gen_addsi3 (stack_pointer_rtx,
5895 cfun->machine->force_align_arg_pointer,
5896 GEN_INT (-4)));
5897 }
5898
5899 /* Sibcall epilogues don't want a return instruction. */
5900 if (style == 0)
5901 return;
5902
5903 if (current_function_pops_args && current_function_args_size)
5904 {
5905 rtx popc = GEN_INT (current_function_pops_args);
5906
5907 /* i386 can only pop 64K bytes. If asked to pop more, pop
5908 return address, do explicit add, and jump indirectly to the
5909 caller. */
5910
5911 if (current_function_pops_args >= 65536)
5912 {
5913 rtx ecx = gen_rtx_REG (SImode, 2);
5914
5915 /* There is no "pascal" calling convention in 64bit ABI. */
5916 gcc_assert (!TARGET_64BIT);
5917
5918 emit_insn (gen_popsi1 (ecx));
5919 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5920 emit_jump_insn (gen_return_indirect_internal (ecx));
5921 }
5922 else
5923 emit_jump_insn (gen_return_pop_internal (popc));
5924 }
5925 else
5926 emit_jump_insn (gen_return_internal ());
5927 }
5928
5929 /* Reset from the function's potential modifications. */
5930
5931 static void
5932 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5933 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5934 {
5935 if (pic_offset_table_rtx)
5936 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5937 #if TARGET_MACHO
5938 /* Mach-O doesn't support labels at the end of objects, so if
5939 it looks like we might want one, insert a NOP. */
5940 {
5941 rtx insn = get_last_insn ();
5942 while (insn
5943 && NOTE_P (insn)
5944 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5945 insn = PREV_INSN (insn);
5946 if (insn
5947 && (LABEL_P (insn)
5948 || (NOTE_P (insn)
5949 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5950 fputs ("\tnop\n", file);
5951 }
5952 #endif
5953
5954 }
5955 \f
5956 /* Extract the parts of an RTL expression that is a valid memory address
5957 for an instruction. Return 0 if the structure of the address is
5958 grossly off. Return -1 if the address contains ASHIFT, so it is not
5959 strictly valid, but still used for computing length of lea instruction. */
5960
5961 int
5962 ix86_decompose_address (rtx addr, struct ix86_address *out)
5963 {
5964 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5965 rtx base_reg, index_reg;
5966 HOST_WIDE_INT scale = 1;
5967 rtx scale_rtx = NULL_RTX;
5968 int retval = 1;
5969 enum ix86_address_seg seg = SEG_DEFAULT;
5970
5971 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5972 base = addr;
5973 else if (GET_CODE (addr) == PLUS)
5974 {
5975 rtx addends[4], op;
5976 int n = 0, i;
5977
5978 op = addr;
5979 do
5980 {
5981 if (n >= 4)
5982 return 0;
5983 addends[n++] = XEXP (op, 1);
5984 op = XEXP (op, 0);
5985 }
5986 while (GET_CODE (op) == PLUS);
5987 if (n >= 4)
5988 return 0;
5989 addends[n] = op;
5990
5991 for (i = n; i >= 0; --i)
5992 {
5993 op = addends[i];
5994 switch (GET_CODE (op))
5995 {
5996 case MULT:
5997 if (index)
5998 return 0;
5999 index = XEXP (op, 0);
6000 scale_rtx = XEXP (op, 1);
6001 break;
6002
6003 case UNSPEC:
6004 if (XINT (op, 1) == UNSPEC_TP
6005 && TARGET_TLS_DIRECT_SEG_REFS
6006 && seg == SEG_DEFAULT)
6007 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6008 else
6009 return 0;
6010 break;
6011
6012 case REG:
6013 case SUBREG:
6014 if (!base)
6015 base = op;
6016 else if (!index)
6017 index = op;
6018 else
6019 return 0;
6020 break;
6021
6022 case CONST:
6023 case CONST_INT:
6024 case SYMBOL_REF:
6025 case LABEL_REF:
6026 if (disp)
6027 return 0;
6028 disp = op;
6029 break;
6030
6031 default:
6032 return 0;
6033 }
6034 }
6035 }
6036 else if (GET_CODE (addr) == MULT)
6037 {
6038 index = XEXP (addr, 0); /* index*scale */
6039 scale_rtx = XEXP (addr, 1);
6040 }
6041 else if (GET_CODE (addr) == ASHIFT)
6042 {
6043 rtx tmp;
6044
6045 /* We're called for lea too, which implements ashift on occasion. */
6046 index = XEXP (addr, 0);
6047 tmp = XEXP (addr, 1);
6048 if (GET_CODE (tmp) != CONST_INT)
6049 return 0;
6050 scale = INTVAL (tmp);
6051 if ((unsigned HOST_WIDE_INT) scale > 3)
6052 return 0;
6053 scale = 1 << scale;
6054 retval = -1;
6055 }
6056 else
6057 disp = addr; /* displacement */
6058
6059 /* Extract the integral value of scale. */
6060 if (scale_rtx)
6061 {
6062 if (GET_CODE (scale_rtx) != CONST_INT)
6063 return 0;
6064 scale = INTVAL (scale_rtx);
6065 }
6066
6067 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6068 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6069
6070 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6071 if (base_reg && index_reg && scale == 1
6072 && (index_reg == arg_pointer_rtx
6073 || index_reg == frame_pointer_rtx
6074 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6075 {
6076 rtx tmp;
6077 tmp = base, base = index, index = tmp;
6078 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6079 }
6080
6081 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6082 if ((base_reg == hard_frame_pointer_rtx
6083 || base_reg == frame_pointer_rtx
6084 || base_reg == arg_pointer_rtx) && !disp)
6085 disp = const0_rtx;
6086
6087 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6088 Avoid this by transforming to [%esi+0]. */
6089 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6090 && base_reg && !index_reg && !disp
6091 && REG_P (base_reg)
6092 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6093 disp = const0_rtx;
6094
6095 /* Special case: encode reg+reg instead of reg*2. */
6096 if (!base && index && scale && scale == 2)
6097 base = index, base_reg = index_reg, scale = 1;
6098
6099 /* Special case: scaling cannot be encoded without base or displacement. */
6100 if (!base && !disp && index && scale != 1)
6101 disp = const0_rtx;
6102
6103 out->base = base;
6104 out->index = index;
6105 out->disp = disp;
6106 out->scale = scale;
6107 out->seg = seg;
6108
6109 return retval;
6110 }
6111 \f
6112 /* Return cost of the memory address x.
6113 For i386, it is better to use a complex address than let gcc copy
6114 the address into a reg and make a new pseudo. But not if the address
6115 requires to two regs - that would mean more pseudos with longer
6116 lifetimes. */
6117 static int
6118 ix86_address_cost (rtx x)
6119 {
6120 struct ix86_address parts;
6121 int cost = 1;
6122 int ok = ix86_decompose_address (x, &parts);
6123
6124 gcc_assert (ok);
6125
6126 if (parts.base && GET_CODE (parts.base) == SUBREG)
6127 parts.base = SUBREG_REG (parts.base);
6128 if (parts.index && GET_CODE (parts.index) == SUBREG)
6129 parts.index = SUBREG_REG (parts.index);
6130
6131 /* More complex memory references are better. */
6132 if (parts.disp && parts.disp != const0_rtx)
6133 cost--;
6134 if (parts.seg != SEG_DEFAULT)
6135 cost--;
6136
6137 /* Attempt to minimize number of registers in the address. */
6138 if ((parts.base
6139 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6140 || (parts.index
6141 && (!REG_P (parts.index)
6142 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6143 cost++;
6144
6145 if (parts.base
6146 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6147 && parts.index
6148 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6149 && parts.base != parts.index)
6150 cost++;
6151
6152 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6153 since it's predecode logic can't detect the length of instructions
6154 and it degenerates to vector decoded. Increase cost of such
6155 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6156 to split such addresses or even refuse such addresses at all.
6157
6158 Following addressing modes are affected:
6159 [base+scale*index]
6160 [scale*index+disp]
6161 [base+index]
6162
6163 The first and last case may be avoidable by explicitly coding the zero in
6164 memory address, but I don't have AMD-K6 machine handy to check this
6165 theory. */
6166
6167 if (TARGET_K6
6168 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6169 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6170 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6171 cost += 10;
6172
6173 return cost;
6174 }
6175 \f
6176 /* If X is a machine specific address (i.e. a symbol or label being
6177 referenced as a displacement from the GOT implemented using an
6178 UNSPEC), then return the base term. Otherwise return X. */
6179
6180 rtx
6181 ix86_find_base_term (rtx x)
6182 {
6183 rtx term;
6184
6185 if (TARGET_64BIT)
6186 {
6187 if (GET_CODE (x) != CONST)
6188 return x;
6189 term = XEXP (x, 0);
6190 if (GET_CODE (term) == PLUS
6191 && (GET_CODE (XEXP (term, 1)) == CONST_INT
6192 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6193 term = XEXP (term, 0);
6194 if (GET_CODE (term) != UNSPEC
6195 || XINT (term, 1) != UNSPEC_GOTPCREL)
6196 return x;
6197
6198 term = XVECEXP (term, 0, 0);
6199
6200 if (GET_CODE (term) != SYMBOL_REF
6201 && GET_CODE (term) != LABEL_REF)
6202 return x;
6203
6204 return term;
6205 }
6206
6207 term = ix86_delegitimize_address (x);
6208
6209 if (GET_CODE (term) != SYMBOL_REF
6210 && GET_CODE (term) != LABEL_REF)
6211 return x;
6212
6213 return term;
6214 }
6215
6216 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6217 this is used for to form addresses to local data when -fPIC is in
6218 use. */
6219
6220 static bool
6221 darwin_local_data_pic (rtx disp)
6222 {
6223 if (GET_CODE (disp) == MINUS)
6224 {
6225 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6226 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6227 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6228 {
6229 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6230 if (! strcmp (sym_name, "<pic base>"))
6231 return true;
6232 }
6233 }
6234
6235 return false;
6236 }
6237 \f
6238 /* Determine if a given RTX is a valid constant. We already know this
6239 satisfies CONSTANT_P. */
6240
6241 bool
6242 legitimate_constant_p (rtx x)
6243 {
6244 switch (GET_CODE (x))
6245 {
6246 case CONST:
6247 x = XEXP (x, 0);
6248
6249 if (GET_CODE (x) == PLUS)
6250 {
6251 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6252 return false;
6253 x = XEXP (x, 0);
6254 }
6255
6256 if (TARGET_MACHO && darwin_local_data_pic (x))
6257 return true;
6258
6259 /* Only some unspecs are valid as "constants". */
6260 if (GET_CODE (x) == UNSPEC)
6261 switch (XINT (x, 1))
6262 {
6263 case UNSPEC_GOTOFF:
6264 return TARGET_64BIT;
6265 case UNSPEC_TPOFF:
6266 case UNSPEC_NTPOFF:
6267 x = XVECEXP (x, 0, 0);
6268 return (GET_CODE (x) == SYMBOL_REF
6269 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6270 case UNSPEC_DTPOFF:
6271 x = XVECEXP (x, 0, 0);
6272 return (GET_CODE (x) == SYMBOL_REF
6273 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6274 default:
6275 return false;
6276 }
6277
6278 /* We must have drilled down to a symbol. */
6279 if (GET_CODE (x) == LABEL_REF)
6280 return true;
6281 if (GET_CODE (x) != SYMBOL_REF)
6282 return false;
6283 /* FALLTHRU */
6284
6285 case SYMBOL_REF:
6286 /* TLS symbols are never valid. */
6287 if (SYMBOL_REF_TLS_MODEL (x))
6288 return false;
6289 break;
6290
6291 case CONST_DOUBLE:
6292 if (GET_MODE (x) == TImode
6293 && x != CONST0_RTX (TImode)
6294 && !TARGET_64BIT)
6295 return false;
6296 break;
6297
6298 case CONST_VECTOR:
6299 if (x == CONST0_RTX (GET_MODE (x)))
6300 return true;
6301 return false;
6302
6303 default:
6304 break;
6305 }
6306
6307 /* Otherwise we handle everything else in the move patterns. */
6308 return true;
6309 }
6310
6311 /* Determine if it's legal to put X into the constant pool. This
6312 is not possible for the address of thread-local symbols, which
6313 is checked above. */
6314
6315 static bool
6316 ix86_cannot_force_const_mem (rtx x)
6317 {
6318 /* We can always put integral constants and vectors in memory. */
6319 switch (GET_CODE (x))
6320 {
6321 case CONST_INT:
6322 case CONST_DOUBLE:
6323 case CONST_VECTOR:
6324 return false;
6325
6326 default:
6327 break;
6328 }
6329 return !legitimate_constant_p (x);
6330 }
6331
6332 /* Determine if a given RTX is a valid constant address. */
6333
6334 bool
6335 constant_address_p (rtx x)
6336 {
6337 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6338 }
6339
6340 /* Nonzero if the constant value X is a legitimate general operand
6341 when generating PIC code. It is given that flag_pic is on and
6342 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6343
6344 bool
6345 legitimate_pic_operand_p (rtx x)
6346 {
6347 rtx inner;
6348
6349 switch (GET_CODE (x))
6350 {
6351 case CONST:
6352 inner = XEXP (x, 0);
6353 if (GET_CODE (inner) == PLUS
6354 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6355 inner = XEXP (inner, 0);
6356
6357 /* Only some unspecs are valid as "constants". */
6358 if (GET_CODE (inner) == UNSPEC)
6359 switch (XINT (inner, 1))
6360 {
6361 case UNSPEC_GOTOFF:
6362 return TARGET_64BIT;
6363 case UNSPEC_TPOFF:
6364 x = XVECEXP (inner, 0, 0);
6365 return (GET_CODE (x) == SYMBOL_REF
6366 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6367 default:
6368 return false;
6369 }
6370 /* FALLTHRU */
6371
6372 case SYMBOL_REF:
6373 case LABEL_REF:
6374 return legitimate_pic_address_disp_p (x);
6375
6376 default:
6377 return true;
6378 }
6379 }
6380
6381 /* Determine if a given CONST RTX is a valid memory displacement
6382 in PIC mode. */
6383
6384 int
6385 legitimate_pic_address_disp_p (rtx disp)
6386 {
6387 bool saw_plus;
6388
6389 /* In 64bit mode we can allow direct addresses of symbols and labels
6390 when they are not dynamic symbols. */
6391 if (TARGET_64BIT)
6392 {
6393 rtx op0 = disp, op1;
6394
6395 switch (GET_CODE (disp))
6396 {
6397 case LABEL_REF:
6398 return true;
6399
6400 case CONST:
6401 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6402 break;
6403 op0 = XEXP (XEXP (disp, 0), 0);
6404 op1 = XEXP (XEXP (disp, 0), 1);
6405 if (GET_CODE (op1) != CONST_INT
6406 || INTVAL (op1) >= 16*1024*1024
6407 || INTVAL (op1) < -16*1024*1024)
6408 break;
6409 if (GET_CODE (op0) == LABEL_REF)
6410 return true;
6411 if (GET_CODE (op0) != SYMBOL_REF)
6412 break;
6413 /* FALLTHRU */
6414
6415 case SYMBOL_REF:
6416 /* TLS references should always be enclosed in UNSPEC. */
6417 if (SYMBOL_REF_TLS_MODEL (op0))
6418 return false;
6419 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6420 return true;
6421 break;
6422
6423 default:
6424 break;
6425 }
6426 }
6427 if (GET_CODE (disp) != CONST)
6428 return 0;
6429 disp = XEXP (disp, 0);
6430
6431 if (TARGET_64BIT)
6432 {
6433 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6434 of GOT tables. We should not need these anyway. */
6435 if (GET_CODE (disp) != UNSPEC
6436 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6437 && XINT (disp, 1) != UNSPEC_GOTOFF))
6438 return 0;
6439
6440 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6441 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6442 return 0;
6443 return 1;
6444 }
6445
6446 saw_plus = false;
6447 if (GET_CODE (disp) == PLUS)
6448 {
6449 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6450 return 0;
6451 disp = XEXP (disp, 0);
6452 saw_plus = true;
6453 }
6454
6455 if (TARGET_MACHO && darwin_local_data_pic (disp))
6456 return 1;
6457
6458 if (GET_CODE (disp) != UNSPEC)
6459 return 0;
6460
6461 switch (XINT (disp, 1))
6462 {
6463 case UNSPEC_GOT:
6464 if (saw_plus)
6465 return false;
6466 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6467 case UNSPEC_GOTOFF:
6468 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6469 While ABI specify also 32bit relocation but we don't produce it in
6470 small PIC model at all. */
6471 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6472 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6473 && !TARGET_64BIT)
6474 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6475 return false;
6476 case UNSPEC_GOTTPOFF:
6477 case UNSPEC_GOTNTPOFF:
6478 case UNSPEC_INDNTPOFF:
6479 if (saw_plus)
6480 return false;
6481 disp = XVECEXP (disp, 0, 0);
6482 return (GET_CODE (disp) == SYMBOL_REF
6483 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6484 case UNSPEC_NTPOFF:
6485 disp = XVECEXP (disp, 0, 0);
6486 return (GET_CODE (disp) == SYMBOL_REF
6487 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6488 case UNSPEC_DTPOFF:
6489 disp = XVECEXP (disp, 0, 0);
6490 return (GET_CODE (disp) == SYMBOL_REF
6491 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6492 }
6493
6494 return 0;
6495 }
6496
6497 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6498 memory address for an instruction. The MODE argument is the machine mode
6499 for the MEM expression that wants to use this address.
6500
6501 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6502 convert common non-canonical forms to canonical form so that they will
6503 be recognized. */
6504
6505 int
6506 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6507 {
6508 struct ix86_address parts;
6509 rtx base, index, disp;
6510 HOST_WIDE_INT scale;
6511 const char *reason = NULL;
6512 rtx reason_rtx = NULL_RTX;
6513
6514 if (TARGET_DEBUG_ADDR)
6515 {
6516 fprintf (stderr,
6517 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6518 GET_MODE_NAME (mode), strict);
6519 debug_rtx (addr);
6520 }
6521
6522 if (ix86_decompose_address (addr, &parts) <= 0)
6523 {
6524 reason = "decomposition failed";
6525 goto report_error;
6526 }
6527
6528 base = parts.base;
6529 index = parts.index;
6530 disp = parts.disp;
6531 scale = parts.scale;
6532
6533 /* Validate base register.
6534
6535 Don't allow SUBREG's that span more than a word here. It can lead to spill
6536 failures when the base is one word out of a two word structure, which is
6537 represented internally as a DImode int. */
6538
6539 if (base)
6540 {
6541 rtx reg;
6542 reason_rtx = base;
6543
6544 if (REG_P (base))
6545 reg = base;
6546 else if (GET_CODE (base) == SUBREG
6547 && REG_P (SUBREG_REG (base))
6548 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6549 <= UNITS_PER_WORD)
6550 reg = SUBREG_REG (base);
6551 else
6552 {
6553 reason = "base is not a register";
6554 goto report_error;
6555 }
6556
6557 if (GET_MODE (base) != Pmode)
6558 {
6559 reason = "base is not in Pmode";
6560 goto report_error;
6561 }
6562
6563 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6564 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6565 {
6566 reason = "base is not valid";
6567 goto report_error;
6568 }
6569 }
6570
6571 /* Validate index register.
6572
6573 Don't allow SUBREG's that span more than a word here -- same as above. */
6574
6575 if (index)
6576 {
6577 rtx reg;
6578 reason_rtx = index;
6579
6580 if (REG_P (index))
6581 reg = index;
6582 else if (GET_CODE (index) == SUBREG
6583 && REG_P (SUBREG_REG (index))
6584 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6585 <= UNITS_PER_WORD)
6586 reg = SUBREG_REG (index);
6587 else
6588 {
6589 reason = "index is not a register";
6590 goto report_error;
6591 }
6592
6593 if (GET_MODE (index) != Pmode)
6594 {
6595 reason = "index is not in Pmode";
6596 goto report_error;
6597 }
6598
6599 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6600 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6601 {
6602 reason = "index is not valid";
6603 goto report_error;
6604 }
6605 }
6606
6607 /* Validate scale factor. */
6608 if (scale != 1)
6609 {
6610 reason_rtx = GEN_INT (scale);
6611 if (!index)
6612 {
6613 reason = "scale without index";
6614 goto report_error;
6615 }
6616
6617 if (scale != 2 && scale != 4 && scale != 8)
6618 {
6619 reason = "scale is not a valid multiplier";
6620 goto report_error;
6621 }
6622 }
6623
6624 /* Validate displacement. */
6625 if (disp)
6626 {
6627 reason_rtx = disp;
6628
6629 if (GET_CODE (disp) == CONST
6630 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6631 switch (XINT (XEXP (disp, 0), 1))
6632 {
6633 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6634 used. While ABI specify also 32bit relocations, we don't produce
6635 them at all and use IP relative instead. */
6636 case UNSPEC_GOT:
6637 case UNSPEC_GOTOFF:
6638 gcc_assert (flag_pic);
6639 if (!TARGET_64BIT)
6640 goto is_legitimate_pic;
6641 reason = "64bit address unspec";
6642 goto report_error;
6643
6644 case UNSPEC_GOTPCREL:
6645 gcc_assert (flag_pic);
6646 goto is_legitimate_pic;
6647
6648 case UNSPEC_GOTTPOFF:
6649 case UNSPEC_GOTNTPOFF:
6650 case UNSPEC_INDNTPOFF:
6651 case UNSPEC_NTPOFF:
6652 case UNSPEC_DTPOFF:
6653 break;
6654
6655 default:
6656 reason = "invalid address unspec";
6657 goto report_error;
6658 }
6659
6660 else if (SYMBOLIC_CONST (disp)
6661 && (flag_pic
6662 || (TARGET_MACHO
6663 #if TARGET_MACHO
6664 && MACHOPIC_INDIRECT
6665 && !machopic_operand_p (disp)
6666 #endif
6667 )))
6668 {
6669
6670 is_legitimate_pic:
6671 if (TARGET_64BIT && (index || base))
6672 {
6673 /* foo@dtpoff(%rX) is ok. */
6674 if (GET_CODE (disp) != CONST
6675 || GET_CODE (XEXP (disp, 0)) != PLUS
6676 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6677 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6678 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6679 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6680 {
6681 reason = "non-constant pic memory reference";
6682 goto report_error;
6683 }
6684 }
6685 else if (! legitimate_pic_address_disp_p (disp))
6686 {
6687 reason = "displacement is an invalid pic construct";
6688 goto report_error;
6689 }
6690
6691 /* This code used to verify that a symbolic pic displacement
6692 includes the pic_offset_table_rtx register.
6693
6694 While this is good idea, unfortunately these constructs may
6695 be created by "adds using lea" optimization for incorrect
6696 code like:
6697
6698 int a;
6699 int foo(int i)
6700 {
6701 return *(&a+i);
6702 }
6703
6704 This code is nonsensical, but results in addressing
6705 GOT table with pic_offset_table_rtx base. We can't
6706 just refuse it easily, since it gets matched by
6707 "addsi3" pattern, that later gets split to lea in the
6708 case output register differs from input. While this
6709 can be handled by separate addsi pattern for this case
6710 that never results in lea, this seems to be easier and
6711 correct fix for crash to disable this test. */
6712 }
6713 else if (GET_CODE (disp) != LABEL_REF
6714 && GET_CODE (disp) != CONST_INT
6715 && (GET_CODE (disp) != CONST
6716 || !legitimate_constant_p (disp))
6717 && (GET_CODE (disp) != SYMBOL_REF
6718 || !legitimate_constant_p (disp)))
6719 {
6720 reason = "displacement is not constant";
6721 goto report_error;
6722 }
6723 else if (TARGET_64BIT
6724 && !x86_64_immediate_operand (disp, VOIDmode))
6725 {
6726 reason = "displacement is out of range";
6727 goto report_error;
6728 }
6729 }
6730
6731 /* Everything looks valid. */
6732 if (TARGET_DEBUG_ADDR)
6733 fprintf (stderr, "Success.\n");
6734 return TRUE;
6735
6736 report_error:
6737 if (TARGET_DEBUG_ADDR)
6738 {
6739 fprintf (stderr, "Error: %s\n", reason);
6740 debug_rtx (reason_rtx);
6741 }
6742 return FALSE;
6743 }
6744 \f
6745 /* Return a unique alias set for the GOT. */
6746
6747 static HOST_WIDE_INT
6748 ix86_GOT_alias_set (void)
6749 {
6750 static HOST_WIDE_INT set = -1;
6751 if (set == -1)
6752 set = new_alias_set ();
6753 return set;
6754 }
6755
6756 /* Return a legitimate reference for ORIG (an address) using the
6757 register REG. If REG is 0, a new pseudo is generated.
6758
6759 There are two types of references that must be handled:
6760
6761 1. Global data references must load the address from the GOT, via
6762 the PIC reg. An insn is emitted to do this load, and the reg is
6763 returned.
6764
6765 2. Static data references, constant pool addresses, and code labels
6766 compute the address as an offset from the GOT, whose base is in
6767 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6768 differentiate them from global data objects. The returned
6769 address is the PIC reg + an unspec constant.
6770
6771 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6772 reg also appears in the address. */
6773
6774 static rtx
6775 legitimize_pic_address (rtx orig, rtx reg)
6776 {
6777 rtx addr = orig;
6778 rtx new = orig;
6779 rtx base;
6780
6781 #if TARGET_MACHO
6782 if (TARGET_MACHO && !TARGET_64BIT)
6783 {
6784 if (reg == 0)
6785 reg = gen_reg_rtx (Pmode);
6786 /* Use the generic Mach-O PIC machinery. */
6787 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6788 }
6789 #endif
6790
6791 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6792 new = addr;
6793 else if (TARGET_64BIT
6794 && ix86_cmodel != CM_SMALL_PIC
6795 && local_symbolic_operand (addr, Pmode))
6796 {
6797 rtx tmpreg;
6798 /* This symbol may be referenced via a displacement from the PIC
6799 base address (@GOTOFF). */
6800
6801 if (reload_in_progress)
6802 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6803 if (GET_CODE (addr) == CONST)
6804 addr = XEXP (addr, 0);
6805 if (GET_CODE (addr) == PLUS)
6806 {
6807 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6808 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6809 }
6810 else
6811 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6812 new = gen_rtx_CONST (Pmode, new);
6813 if (!reg)
6814 tmpreg = gen_reg_rtx (Pmode);
6815 else
6816 tmpreg = reg;
6817 emit_move_insn (tmpreg, new);
6818
6819 if (reg != 0)
6820 {
6821 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6822 tmpreg, 1, OPTAB_DIRECT);
6823 new = reg;
6824 }
6825 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6826 }
6827 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6828 {
6829 /* This symbol may be referenced via a displacement from the PIC
6830 base address (@GOTOFF). */
6831
6832 if (reload_in_progress)
6833 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6834 if (GET_CODE (addr) == CONST)
6835 addr = XEXP (addr, 0);
6836 if (GET_CODE (addr) == PLUS)
6837 {
6838 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6839 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6840 }
6841 else
6842 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6843 new = gen_rtx_CONST (Pmode, new);
6844 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6845
6846 if (reg != 0)
6847 {
6848 emit_move_insn (reg, new);
6849 new = reg;
6850 }
6851 }
6852 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6853 {
6854 if (TARGET_64BIT)
6855 {
6856 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6857 new = gen_rtx_CONST (Pmode, new);
6858 new = gen_const_mem (Pmode, new);
6859 set_mem_alias_set (new, ix86_GOT_alias_set ());
6860
6861 if (reg == 0)
6862 reg = gen_reg_rtx (Pmode);
6863 /* Use directly gen_movsi, otherwise the address is loaded
6864 into register for CSE. We don't want to CSE this addresses,
6865 instead we CSE addresses from the GOT table, so skip this. */
6866 emit_insn (gen_movsi (reg, new));
6867 new = reg;
6868 }
6869 else
6870 {
6871 /* This symbol must be referenced via a load from the
6872 Global Offset Table (@GOT). */
6873
6874 if (reload_in_progress)
6875 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6876 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6877 new = gen_rtx_CONST (Pmode, new);
6878 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6879 new = gen_const_mem (Pmode, new);
6880 set_mem_alias_set (new, ix86_GOT_alias_set ());
6881
6882 if (reg == 0)
6883 reg = gen_reg_rtx (Pmode);
6884 emit_move_insn (reg, new);
6885 new = reg;
6886 }
6887 }
6888 else
6889 {
6890 if (GET_CODE (addr) == CONST_INT
6891 && !x86_64_immediate_operand (addr, VOIDmode))
6892 {
6893 if (reg)
6894 {
6895 emit_move_insn (reg, addr);
6896 new = reg;
6897 }
6898 else
6899 new = force_reg (Pmode, addr);
6900 }
6901 else if (GET_CODE (addr) == CONST)
6902 {
6903 addr = XEXP (addr, 0);
6904
6905 /* We must match stuff we generate before. Assume the only
6906 unspecs that can get here are ours. Not that we could do
6907 anything with them anyway.... */
6908 if (GET_CODE (addr) == UNSPEC
6909 || (GET_CODE (addr) == PLUS
6910 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6911 return orig;
6912 gcc_assert (GET_CODE (addr) == PLUS);
6913 }
6914 if (GET_CODE (addr) == PLUS)
6915 {
6916 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6917
6918 /* Check first to see if this is a constant offset from a @GOTOFF
6919 symbol reference. */
6920 if (local_symbolic_operand (op0, Pmode)
6921 && GET_CODE (op1) == CONST_INT)
6922 {
6923 if (!TARGET_64BIT)
6924 {
6925 if (reload_in_progress)
6926 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6927 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6928 UNSPEC_GOTOFF);
6929 new = gen_rtx_PLUS (Pmode, new, op1);
6930 new = gen_rtx_CONST (Pmode, new);
6931 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6932
6933 if (reg != 0)
6934 {
6935 emit_move_insn (reg, new);
6936 new = reg;
6937 }
6938 }
6939 else
6940 {
6941 if (INTVAL (op1) < -16*1024*1024
6942 || INTVAL (op1) >= 16*1024*1024)
6943 {
6944 if (!x86_64_immediate_operand (op1, Pmode))
6945 op1 = force_reg (Pmode, op1);
6946 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6947 }
6948 }
6949 }
6950 else
6951 {
6952 base = legitimize_pic_address (XEXP (addr, 0), reg);
6953 new = legitimize_pic_address (XEXP (addr, 1),
6954 base == reg ? NULL_RTX : reg);
6955
6956 if (GET_CODE (new) == CONST_INT)
6957 new = plus_constant (base, INTVAL (new));
6958 else
6959 {
6960 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6961 {
6962 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6963 new = XEXP (new, 1);
6964 }
6965 new = gen_rtx_PLUS (Pmode, base, new);
6966 }
6967 }
6968 }
6969 }
6970 return new;
6971 }
6972 \f
6973 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6974
6975 static rtx
6976 get_thread_pointer (int to_reg)
6977 {
6978 rtx tp, reg, insn;
6979
6980 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6981 if (!to_reg)
6982 return tp;
6983
6984 reg = gen_reg_rtx (Pmode);
6985 insn = gen_rtx_SET (VOIDmode, reg, tp);
6986 insn = emit_insn (insn);
6987
6988 return reg;
6989 }
6990
6991 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6992 false if we expect this to be used for a memory address and true if
6993 we expect to load the address into a register. */
6994
6995 static rtx
6996 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6997 {
6998 rtx dest, base, off, pic, tp;
6999 int type;
7000
7001 switch (model)
7002 {
7003 case TLS_MODEL_GLOBAL_DYNAMIC:
7004 dest = gen_reg_rtx (Pmode);
7005 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7006
7007 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7008 {
7009 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7010
7011 start_sequence ();
7012 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7013 insns = get_insns ();
7014 end_sequence ();
7015
7016 emit_libcall_block (insns, dest, rax, x);
7017 }
7018 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7019 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7020 else
7021 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7022
7023 if (TARGET_GNU2_TLS)
7024 {
7025 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7026
7027 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7028 }
7029 break;
7030
7031 case TLS_MODEL_LOCAL_DYNAMIC:
7032 base = gen_reg_rtx (Pmode);
7033 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7034
7035 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7036 {
7037 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7038
7039 start_sequence ();
7040 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7041 insns = get_insns ();
7042 end_sequence ();
7043
7044 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7045 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7046 emit_libcall_block (insns, base, rax, note);
7047 }
7048 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7049 emit_insn (gen_tls_local_dynamic_base_64 (base));
7050 else
7051 emit_insn (gen_tls_local_dynamic_base_32 (base));
7052
7053 if (TARGET_GNU2_TLS)
7054 {
7055 rtx x = ix86_tls_module_base ();
7056
7057 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7058 gen_rtx_MINUS (Pmode, x, tp));
7059 }
7060
7061 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7062 off = gen_rtx_CONST (Pmode, off);
7063
7064 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7065
7066 if (TARGET_GNU2_TLS)
7067 {
7068 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7069
7070 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7071 }
7072
7073 break;
7074
7075 case TLS_MODEL_INITIAL_EXEC:
7076 if (TARGET_64BIT)
7077 {
7078 pic = NULL;
7079 type = UNSPEC_GOTNTPOFF;
7080 }
7081 else if (flag_pic)
7082 {
7083 if (reload_in_progress)
7084 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7085 pic = pic_offset_table_rtx;
7086 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7087 }
7088 else if (!TARGET_ANY_GNU_TLS)
7089 {
7090 pic = gen_reg_rtx (Pmode);
7091 emit_insn (gen_set_got (pic));
7092 type = UNSPEC_GOTTPOFF;
7093 }
7094 else
7095 {
7096 pic = NULL;
7097 type = UNSPEC_INDNTPOFF;
7098 }
7099
7100 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7101 off = gen_rtx_CONST (Pmode, off);
7102 if (pic)
7103 off = gen_rtx_PLUS (Pmode, pic, off);
7104 off = gen_const_mem (Pmode, off);
7105 set_mem_alias_set (off, ix86_GOT_alias_set ());
7106
7107 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7108 {
7109 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7110 off = force_reg (Pmode, off);
7111 return gen_rtx_PLUS (Pmode, base, off);
7112 }
7113 else
7114 {
7115 base = get_thread_pointer (true);
7116 dest = gen_reg_rtx (Pmode);
7117 emit_insn (gen_subsi3 (dest, base, off));
7118 }
7119 break;
7120
7121 case TLS_MODEL_LOCAL_EXEC:
7122 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7123 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7124 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7125 off = gen_rtx_CONST (Pmode, off);
7126
7127 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7128 {
7129 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7130 return gen_rtx_PLUS (Pmode, base, off);
7131 }
7132 else
7133 {
7134 base = get_thread_pointer (true);
7135 dest = gen_reg_rtx (Pmode);
7136 emit_insn (gen_subsi3 (dest, base, off));
7137 }
7138 break;
7139
7140 default:
7141 gcc_unreachable ();
7142 }
7143
7144 return dest;
7145 }
7146
7147 /* Try machine-dependent ways of modifying an illegitimate address
7148 to be legitimate. If we find one, return the new, valid address.
7149 This macro is used in only one place: `memory_address' in explow.c.
7150
7151 OLDX is the address as it was before break_out_memory_refs was called.
7152 In some cases it is useful to look at this to decide what needs to be done.
7153
7154 MODE and WIN are passed so that this macro can use
7155 GO_IF_LEGITIMATE_ADDRESS.
7156
7157 It is always safe for this macro to do nothing. It exists to recognize
7158 opportunities to optimize the output.
7159
7160 For the 80386, we handle X+REG by loading X into a register R and
7161 using R+REG. R will go in a general reg and indexing will be used.
7162 However, if REG is a broken-out memory address or multiplication,
7163 nothing needs to be done because REG can certainly go in a general reg.
7164
7165 When -fpic is used, special handling is needed for symbolic references.
7166 See comments by legitimize_pic_address in i386.c for details. */
7167
7168 rtx
7169 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7170 {
7171 int changed = 0;
7172 unsigned log;
7173
7174 if (TARGET_DEBUG_ADDR)
7175 {
7176 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7177 GET_MODE_NAME (mode));
7178 debug_rtx (x);
7179 }
7180
7181 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7182 if (log)
7183 return legitimize_tls_address (x, log, false);
7184 if (GET_CODE (x) == CONST
7185 && GET_CODE (XEXP (x, 0)) == PLUS
7186 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7187 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7188 {
7189 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7190 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7191 }
7192
7193 if (flag_pic && SYMBOLIC_CONST (x))
7194 return legitimize_pic_address (x, 0);
7195
7196 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7197 if (GET_CODE (x) == ASHIFT
7198 && GET_CODE (XEXP (x, 1)) == CONST_INT
7199 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7200 {
7201 changed = 1;
7202 log = INTVAL (XEXP (x, 1));
7203 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7204 GEN_INT (1 << log));
7205 }
7206
7207 if (GET_CODE (x) == PLUS)
7208 {
7209 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7210
7211 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7212 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7213 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7214 {
7215 changed = 1;
7216 log = INTVAL (XEXP (XEXP (x, 0), 1));
7217 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7218 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7219 GEN_INT (1 << log));
7220 }
7221
7222 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7223 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7224 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7225 {
7226 changed = 1;
7227 log = INTVAL (XEXP (XEXP (x, 1), 1));
7228 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7229 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7230 GEN_INT (1 << log));
7231 }
7232
7233 /* Put multiply first if it isn't already. */
7234 if (GET_CODE (XEXP (x, 1)) == MULT)
7235 {
7236 rtx tmp = XEXP (x, 0);
7237 XEXP (x, 0) = XEXP (x, 1);
7238 XEXP (x, 1) = tmp;
7239 changed = 1;
7240 }
7241
7242 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7243 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7244 created by virtual register instantiation, register elimination, and
7245 similar optimizations. */
7246 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7247 {
7248 changed = 1;
7249 x = gen_rtx_PLUS (Pmode,
7250 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7251 XEXP (XEXP (x, 1), 0)),
7252 XEXP (XEXP (x, 1), 1));
7253 }
7254
7255 /* Canonicalize
7256 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7257 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7258 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7259 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7260 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7261 && CONSTANT_P (XEXP (x, 1)))
7262 {
7263 rtx constant;
7264 rtx other = NULL_RTX;
7265
7266 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7267 {
7268 constant = XEXP (x, 1);
7269 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7270 }
7271 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7272 {
7273 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7274 other = XEXP (x, 1);
7275 }
7276 else
7277 constant = 0;
7278
7279 if (constant)
7280 {
7281 changed = 1;
7282 x = gen_rtx_PLUS (Pmode,
7283 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7284 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7285 plus_constant (other, INTVAL (constant)));
7286 }
7287 }
7288
7289 if (changed && legitimate_address_p (mode, x, FALSE))
7290 return x;
7291
7292 if (GET_CODE (XEXP (x, 0)) == MULT)
7293 {
7294 changed = 1;
7295 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7296 }
7297
7298 if (GET_CODE (XEXP (x, 1)) == MULT)
7299 {
7300 changed = 1;
7301 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7302 }
7303
7304 if (changed
7305 && GET_CODE (XEXP (x, 1)) == REG
7306 && GET_CODE (XEXP (x, 0)) == REG)
7307 return x;
7308
7309 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7310 {
7311 changed = 1;
7312 x = legitimize_pic_address (x, 0);
7313 }
7314
7315 if (changed && legitimate_address_p (mode, x, FALSE))
7316 return x;
7317
7318 if (GET_CODE (XEXP (x, 0)) == REG)
7319 {
7320 rtx temp = gen_reg_rtx (Pmode);
7321 rtx val = force_operand (XEXP (x, 1), temp);
7322 if (val != temp)
7323 emit_move_insn (temp, val);
7324
7325 XEXP (x, 1) = temp;
7326 return x;
7327 }
7328
7329 else if (GET_CODE (XEXP (x, 1)) == REG)
7330 {
7331 rtx temp = gen_reg_rtx (Pmode);
7332 rtx val = force_operand (XEXP (x, 0), temp);
7333 if (val != temp)
7334 emit_move_insn (temp, val);
7335
7336 XEXP (x, 0) = temp;
7337 return x;
7338 }
7339 }
7340
7341 return x;
7342 }
7343 \f
7344 /* Print an integer constant expression in assembler syntax. Addition
7345 and subtraction are the only arithmetic that may appear in these
7346 expressions. FILE is the stdio stream to write to, X is the rtx, and
7347 CODE is the operand print code from the output string. */
7348
7349 static void
7350 output_pic_addr_const (FILE *file, rtx x, int code)
7351 {
7352 char buf[256];
7353
7354 switch (GET_CODE (x))
7355 {
7356 case PC:
7357 gcc_assert (flag_pic);
7358 putc ('.', file);
7359 break;
7360
7361 case SYMBOL_REF:
7362 output_addr_const (file, x);
7363 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7364 fputs ("@PLT", file);
7365 break;
7366
7367 case LABEL_REF:
7368 x = XEXP (x, 0);
7369 /* FALLTHRU */
7370 case CODE_LABEL:
7371 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7372 assemble_name (asm_out_file, buf);
7373 break;
7374
7375 case CONST_INT:
7376 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7377 break;
7378
7379 case CONST:
7380 /* This used to output parentheses around the expression,
7381 but that does not work on the 386 (either ATT or BSD assembler). */
7382 output_pic_addr_const (file, XEXP (x, 0), code);
7383 break;
7384
7385 case CONST_DOUBLE:
7386 if (GET_MODE (x) == VOIDmode)
7387 {
7388 /* We can use %d if the number is <32 bits and positive. */
7389 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7390 fprintf (file, "0x%lx%08lx",
7391 (unsigned long) CONST_DOUBLE_HIGH (x),
7392 (unsigned long) CONST_DOUBLE_LOW (x));
7393 else
7394 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7395 }
7396 else
7397 /* We can't handle floating point constants;
7398 PRINT_OPERAND must handle them. */
7399 output_operand_lossage ("floating constant misused");
7400 break;
7401
7402 case PLUS:
7403 /* Some assemblers need integer constants to appear first. */
7404 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7405 {
7406 output_pic_addr_const (file, XEXP (x, 0), code);
7407 putc ('+', file);
7408 output_pic_addr_const (file, XEXP (x, 1), code);
7409 }
7410 else
7411 {
7412 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7413 output_pic_addr_const (file, XEXP (x, 1), code);
7414 putc ('+', file);
7415 output_pic_addr_const (file, XEXP (x, 0), code);
7416 }
7417 break;
7418
7419 case MINUS:
7420 if (!TARGET_MACHO)
7421 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7422 output_pic_addr_const (file, XEXP (x, 0), code);
7423 putc ('-', file);
7424 output_pic_addr_const (file, XEXP (x, 1), code);
7425 if (!TARGET_MACHO)
7426 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7427 break;
7428
7429 case UNSPEC:
7430 gcc_assert (XVECLEN (x, 0) == 1);
7431 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7432 switch (XINT (x, 1))
7433 {
7434 case UNSPEC_GOT:
7435 fputs ("@GOT", file);
7436 break;
7437 case UNSPEC_GOTOFF:
7438 fputs ("@GOTOFF", file);
7439 break;
7440 case UNSPEC_GOTPCREL:
7441 fputs ("@GOTPCREL(%rip)", file);
7442 break;
7443 case UNSPEC_GOTTPOFF:
7444 /* FIXME: This might be @TPOFF in Sun ld too. */
7445 fputs ("@GOTTPOFF", file);
7446 break;
7447 case UNSPEC_TPOFF:
7448 fputs ("@TPOFF", file);
7449 break;
7450 case UNSPEC_NTPOFF:
7451 if (TARGET_64BIT)
7452 fputs ("@TPOFF", file);
7453 else
7454 fputs ("@NTPOFF", file);
7455 break;
7456 case UNSPEC_DTPOFF:
7457 fputs ("@DTPOFF", file);
7458 break;
7459 case UNSPEC_GOTNTPOFF:
7460 if (TARGET_64BIT)
7461 fputs ("@GOTTPOFF(%rip)", file);
7462 else
7463 fputs ("@GOTNTPOFF", file);
7464 break;
7465 case UNSPEC_INDNTPOFF:
7466 fputs ("@INDNTPOFF", file);
7467 break;
7468 default:
7469 output_operand_lossage ("invalid UNSPEC as operand");
7470 break;
7471 }
7472 break;
7473
7474 default:
7475 output_operand_lossage ("invalid expression as operand");
7476 }
7477 }
7478
7479 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7480 We need to emit DTP-relative relocations. */
7481
7482 static void
7483 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7484 {
7485 fputs (ASM_LONG, file);
7486 output_addr_const (file, x);
7487 fputs ("@DTPOFF", file);
7488 switch (size)
7489 {
7490 case 4:
7491 break;
7492 case 8:
7493 fputs (", 0", file);
7494 break;
7495 default:
7496 gcc_unreachable ();
7497 }
7498 }
7499
7500 /* In the name of slightly smaller debug output, and to cater to
7501 general assembler lossage, recognize PIC+GOTOFF and turn it back
7502 into a direct symbol reference.
7503
7504 On Darwin, this is necessary to avoid a crash, because Darwin
7505 has a different PIC label for each routine but the DWARF debugging
7506 information is not associated with any particular routine, so it's
7507 necessary to remove references to the PIC label from RTL stored by
7508 the DWARF output code. */
7509
7510 static rtx
7511 ix86_delegitimize_address (rtx orig_x)
7512 {
7513 rtx x = orig_x;
7514 /* reg_addend is NULL or a multiple of some register. */
7515 rtx reg_addend = NULL_RTX;
7516 /* const_addend is NULL or a const_int. */
7517 rtx const_addend = NULL_RTX;
7518 /* This is the result, or NULL. */
7519 rtx result = NULL_RTX;
7520
7521 if (GET_CODE (x) == MEM)
7522 x = XEXP (x, 0);
7523
7524 if (TARGET_64BIT)
7525 {
7526 if (GET_CODE (x) != CONST
7527 || GET_CODE (XEXP (x, 0)) != UNSPEC
7528 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7529 || GET_CODE (orig_x) != MEM)
7530 return orig_x;
7531 return XVECEXP (XEXP (x, 0), 0, 0);
7532 }
7533
7534 if (GET_CODE (x) != PLUS
7535 || GET_CODE (XEXP (x, 1)) != CONST)
7536 return orig_x;
7537
7538 if (GET_CODE (XEXP (x, 0)) == REG
7539 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7540 /* %ebx + GOT/GOTOFF */
7541 ;
7542 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7543 {
7544 /* %ebx + %reg * scale + GOT/GOTOFF */
7545 reg_addend = XEXP (x, 0);
7546 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7547 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7548 reg_addend = XEXP (reg_addend, 1);
7549 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7550 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7551 reg_addend = XEXP (reg_addend, 0);
7552 else
7553 return orig_x;
7554 if (GET_CODE (reg_addend) != REG
7555 && GET_CODE (reg_addend) != MULT
7556 && GET_CODE (reg_addend) != ASHIFT)
7557 return orig_x;
7558 }
7559 else
7560 return orig_x;
7561
7562 x = XEXP (XEXP (x, 1), 0);
7563 if (GET_CODE (x) == PLUS
7564 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7565 {
7566 const_addend = XEXP (x, 1);
7567 x = XEXP (x, 0);
7568 }
7569
7570 if (GET_CODE (x) == UNSPEC
7571 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7572 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7573 result = XVECEXP (x, 0, 0);
7574
7575 if (TARGET_MACHO && darwin_local_data_pic (x)
7576 && GET_CODE (orig_x) != MEM)
7577 result = XEXP (x, 0);
7578
7579 if (! result)
7580 return orig_x;
7581
7582 if (const_addend)
7583 result = gen_rtx_PLUS (Pmode, result, const_addend);
7584 if (reg_addend)
7585 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7586 return result;
7587 }
7588 \f
7589 static void
7590 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7591 int fp, FILE *file)
7592 {
7593 const char *suffix;
7594
7595 if (mode == CCFPmode || mode == CCFPUmode)
7596 {
7597 enum rtx_code second_code, bypass_code;
7598 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7599 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7600 code = ix86_fp_compare_code_to_integer (code);
7601 mode = CCmode;
7602 }
7603 if (reverse)
7604 code = reverse_condition (code);
7605
7606 switch (code)
7607 {
7608 case EQ:
7609 suffix = "e";
7610 break;
7611 case NE:
7612 suffix = "ne";
7613 break;
7614 case GT:
7615 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7616 suffix = "g";
7617 break;
7618 case GTU:
7619 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7620 Those same assemblers have the same but opposite lossage on cmov. */
7621 gcc_assert (mode == CCmode);
7622 suffix = fp ? "nbe" : "a";
7623 break;
7624 case LT:
7625 switch (mode)
7626 {
7627 case CCNOmode:
7628 case CCGOCmode:
7629 suffix = "s";
7630 break;
7631
7632 case CCmode:
7633 case CCGCmode:
7634 suffix = "l";
7635 break;
7636
7637 default:
7638 gcc_unreachable ();
7639 }
7640 break;
7641 case LTU:
7642 gcc_assert (mode == CCmode);
7643 suffix = "b";
7644 break;
7645 case GE:
7646 switch (mode)
7647 {
7648 case CCNOmode:
7649 case CCGOCmode:
7650 suffix = "ns";
7651 break;
7652
7653 case CCmode:
7654 case CCGCmode:
7655 suffix = "ge";
7656 break;
7657
7658 default:
7659 gcc_unreachable ();
7660 }
7661 break;
7662 case GEU:
7663 /* ??? As above. */
7664 gcc_assert (mode == CCmode);
7665 suffix = fp ? "nb" : "ae";
7666 break;
7667 case LE:
7668 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7669 suffix = "le";
7670 break;
7671 case LEU:
7672 gcc_assert (mode == CCmode);
7673 suffix = "be";
7674 break;
7675 case UNORDERED:
7676 suffix = fp ? "u" : "p";
7677 break;
7678 case ORDERED:
7679 suffix = fp ? "nu" : "np";
7680 break;
7681 default:
7682 gcc_unreachable ();
7683 }
7684 fputs (suffix, file);
7685 }
7686
7687 /* Print the name of register X to FILE based on its machine mode and number.
7688 If CODE is 'w', pretend the mode is HImode.
7689 If CODE is 'b', pretend the mode is QImode.
7690 If CODE is 'k', pretend the mode is SImode.
7691 If CODE is 'q', pretend the mode is DImode.
7692 If CODE is 'h', pretend the reg is the 'high' byte register.
7693 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7694
7695 void
7696 print_reg (rtx x, int code, FILE *file)
7697 {
7698 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7699 && REGNO (x) != FRAME_POINTER_REGNUM
7700 && REGNO (x) != FLAGS_REG
7701 && REGNO (x) != FPSR_REG
7702 && REGNO (x) != FPCR_REG);
7703
7704 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7705 putc ('%', file);
7706
7707 if (code == 'w' || MMX_REG_P (x))
7708 code = 2;
7709 else if (code == 'b')
7710 code = 1;
7711 else if (code == 'k')
7712 code = 4;
7713 else if (code == 'q')
7714 code = 8;
7715 else if (code == 'y')
7716 code = 3;
7717 else if (code == 'h')
7718 code = 0;
7719 else
7720 code = GET_MODE_SIZE (GET_MODE (x));
7721
7722 /* Irritatingly, AMD extended registers use different naming convention
7723 from the normal registers. */
7724 if (REX_INT_REG_P (x))
7725 {
7726 gcc_assert (TARGET_64BIT);
7727 switch (code)
7728 {
7729 case 0:
7730 error ("extended registers have no high halves");
7731 break;
7732 case 1:
7733 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7734 break;
7735 case 2:
7736 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7737 break;
7738 case 4:
7739 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7740 break;
7741 case 8:
7742 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7743 break;
7744 default:
7745 error ("unsupported operand size for extended register");
7746 break;
7747 }
7748 return;
7749 }
7750 switch (code)
7751 {
7752 case 3:
7753 if (STACK_TOP_P (x))
7754 {
7755 fputs ("st(0)", file);
7756 break;
7757 }
7758 /* FALLTHRU */
7759 case 8:
7760 case 4:
7761 case 12:
7762 if (! ANY_FP_REG_P (x))
7763 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7764 /* FALLTHRU */
7765 case 16:
7766 case 2:
7767 normal:
7768 fputs (hi_reg_name[REGNO (x)], file);
7769 break;
7770 case 1:
7771 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7772 goto normal;
7773 fputs (qi_reg_name[REGNO (x)], file);
7774 break;
7775 case 0:
7776 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7777 goto normal;
7778 fputs (qi_high_reg_name[REGNO (x)], file);
7779 break;
7780 default:
7781 gcc_unreachable ();
7782 }
7783 }
7784
7785 /* Locate some local-dynamic symbol still in use by this function
7786 so that we can print its name in some tls_local_dynamic_base
7787 pattern. */
7788
7789 static const char *
7790 get_some_local_dynamic_name (void)
7791 {
7792 rtx insn;
7793
7794 if (cfun->machine->some_ld_name)
7795 return cfun->machine->some_ld_name;
7796
7797 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7798 if (INSN_P (insn)
7799 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7800 return cfun->machine->some_ld_name;
7801
7802 gcc_unreachable ();
7803 }
7804
7805 static int
7806 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7807 {
7808 rtx x = *px;
7809
7810 if (GET_CODE (x) == SYMBOL_REF
7811 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7812 {
7813 cfun->machine->some_ld_name = XSTR (x, 0);
7814 return 1;
7815 }
7816
7817 return 0;
7818 }
7819
7820 /* Meaning of CODE:
7821 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7822 C -- print opcode suffix for set/cmov insn.
7823 c -- like C, but print reversed condition
7824 F,f -- likewise, but for floating-point.
7825 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7826 otherwise nothing
7827 R -- print the prefix for register names.
7828 z -- print the opcode suffix for the size of the current operand.
7829 * -- print a star (in certain assembler syntax)
7830 A -- print an absolute memory reference.
7831 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7832 s -- print a shift double count, followed by the assemblers argument
7833 delimiter.
7834 b -- print the QImode name of the register for the indicated operand.
7835 %b0 would print %al if operands[0] is reg 0.
7836 w -- likewise, print the HImode name of the register.
7837 k -- likewise, print the SImode name of the register.
7838 q -- likewise, print the DImode name of the register.
7839 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7840 y -- print "st(0)" instead of "st" as a register.
7841 D -- print condition for SSE cmp instruction.
7842 P -- if PIC, print an @PLT suffix.
7843 X -- don't print any sort of PIC '@' suffix for a symbol.
7844 & -- print some in-use local-dynamic symbol name.
7845 H -- print a memory address offset by 8; used for sse high-parts
7846 */
7847
7848 void
7849 print_operand (FILE *file, rtx x, int code)
7850 {
7851 if (code)
7852 {
7853 switch (code)
7854 {
7855 case '*':
7856 if (ASSEMBLER_DIALECT == ASM_ATT)
7857 putc ('*', file);
7858 return;
7859
7860 case '&':
7861 assemble_name (file, get_some_local_dynamic_name ());
7862 return;
7863
7864 case 'A':
7865 switch (ASSEMBLER_DIALECT)
7866 {
7867 case ASM_ATT:
7868 putc ('*', file);
7869 break;
7870
7871 case ASM_INTEL:
7872 /* Intel syntax. For absolute addresses, registers should not
7873 be surrounded by braces. */
7874 if (GET_CODE (x) != REG)
7875 {
7876 putc ('[', file);
7877 PRINT_OPERAND (file, x, 0);
7878 putc (']', file);
7879 return;
7880 }
7881 break;
7882
7883 default:
7884 gcc_unreachable ();
7885 }
7886
7887 PRINT_OPERAND (file, x, 0);
7888 return;
7889
7890
7891 case 'L':
7892 if (ASSEMBLER_DIALECT == ASM_ATT)
7893 putc ('l', file);
7894 return;
7895
7896 case 'W':
7897 if (ASSEMBLER_DIALECT == ASM_ATT)
7898 putc ('w', file);
7899 return;
7900
7901 case 'B':
7902 if (ASSEMBLER_DIALECT == ASM_ATT)
7903 putc ('b', file);
7904 return;
7905
7906 case 'Q':
7907 if (ASSEMBLER_DIALECT == ASM_ATT)
7908 putc ('l', file);
7909 return;
7910
7911 case 'S':
7912 if (ASSEMBLER_DIALECT == ASM_ATT)
7913 putc ('s', file);
7914 return;
7915
7916 case 'T':
7917 if (ASSEMBLER_DIALECT == ASM_ATT)
7918 putc ('t', file);
7919 return;
7920
7921 case 'z':
7922 /* 387 opcodes don't get size suffixes if the operands are
7923 registers. */
7924 if (STACK_REG_P (x))
7925 return;
7926
7927 /* Likewise if using Intel opcodes. */
7928 if (ASSEMBLER_DIALECT == ASM_INTEL)
7929 return;
7930
7931 /* This is the size of op from size of operand. */
7932 switch (GET_MODE_SIZE (GET_MODE (x)))
7933 {
7934 case 2:
7935 #ifdef HAVE_GAS_FILDS_FISTS
7936 putc ('s', file);
7937 #endif
7938 return;
7939
7940 case 4:
7941 if (GET_MODE (x) == SFmode)
7942 {
7943 putc ('s', file);
7944 return;
7945 }
7946 else
7947 putc ('l', file);
7948 return;
7949
7950 case 12:
7951 case 16:
7952 putc ('t', file);
7953 return;
7954
7955 case 8:
7956 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7957 {
7958 #ifdef GAS_MNEMONICS
7959 putc ('q', file);
7960 #else
7961 putc ('l', file);
7962 putc ('l', file);
7963 #endif
7964 }
7965 else
7966 putc ('l', file);
7967 return;
7968
7969 default:
7970 gcc_unreachable ();
7971 }
7972
7973 case 'b':
7974 case 'w':
7975 case 'k':
7976 case 'q':
7977 case 'h':
7978 case 'y':
7979 case 'X':
7980 case 'P':
7981 break;
7982
7983 case 's':
7984 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7985 {
7986 PRINT_OPERAND (file, x, 0);
7987 putc (',', file);
7988 }
7989 return;
7990
7991 case 'D':
7992 /* Little bit of braindamage here. The SSE compare instructions
7993 does use completely different names for the comparisons that the
7994 fp conditional moves. */
7995 switch (GET_CODE (x))
7996 {
7997 case EQ:
7998 case UNEQ:
7999 fputs ("eq", file);
8000 break;
8001 case LT:
8002 case UNLT:
8003 fputs ("lt", file);
8004 break;
8005 case LE:
8006 case UNLE:
8007 fputs ("le", file);
8008 break;
8009 case UNORDERED:
8010 fputs ("unord", file);
8011 break;
8012 case NE:
8013 case LTGT:
8014 fputs ("neq", file);
8015 break;
8016 case UNGE:
8017 case GE:
8018 fputs ("nlt", file);
8019 break;
8020 case UNGT:
8021 case GT:
8022 fputs ("nle", file);
8023 break;
8024 case ORDERED:
8025 fputs ("ord", file);
8026 break;
8027 default:
8028 gcc_unreachable ();
8029 }
8030 return;
8031 case 'O':
8032 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8033 if (ASSEMBLER_DIALECT == ASM_ATT)
8034 {
8035 switch (GET_MODE (x))
8036 {
8037 case HImode: putc ('w', file); break;
8038 case SImode:
8039 case SFmode: putc ('l', file); break;
8040 case DImode:
8041 case DFmode: putc ('q', file); break;
8042 default: gcc_unreachable ();
8043 }
8044 putc ('.', file);
8045 }
8046 #endif
8047 return;
8048 case 'C':
8049 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8050 return;
8051 case 'F':
8052 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8053 if (ASSEMBLER_DIALECT == ASM_ATT)
8054 putc ('.', file);
8055 #endif
8056 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8057 return;
8058
8059 /* Like above, but reverse condition */
8060 case 'c':
8061 /* Check to see if argument to %c is really a constant
8062 and not a condition code which needs to be reversed. */
8063 if (!COMPARISON_P (x))
8064 {
8065 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8066 return;
8067 }
8068 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8069 return;
8070 case 'f':
8071 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8072 if (ASSEMBLER_DIALECT == ASM_ATT)
8073 putc ('.', file);
8074 #endif
8075 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8076 return;
8077
8078 case 'H':
8079 /* It doesn't actually matter what mode we use here, as we're
8080 only going to use this for printing. */
8081 x = adjust_address_nv (x, DImode, 8);
8082 break;
8083
8084 case '+':
8085 {
8086 rtx x;
8087
8088 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8089 return;
8090
8091 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8092 if (x)
8093 {
8094 int pred_val = INTVAL (XEXP (x, 0));
8095
8096 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8097 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8098 {
8099 int taken = pred_val > REG_BR_PROB_BASE / 2;
8100 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8101
8102 /* Emit hints only in the case default branch prediction
8103 heuristics would fail. */
8104 if (taken != cputaken)
8105 {
8106 /* We use 3e (DS) prefix for taken branches and
8107 2e (CS) prefix for not taken branches. */
8108 if (taken)
8109 fputs ("ds ; ", file);
8110 else
8111 fputs ("cs ; ", file);
8112 }
8113 }
8114 }
8115 return;
8116 }
8117 default:
8118 output_operand_lossage ("invalid operand code '%c'", code);
8119 }
8120 }
8121
8122 if (GET_CODE (x) == REG)
8123 print_reg (x, code, file);
8124
8125 else if (GET_CODE (x) == MEM)
8126 {
8127 /* No `byte ptr' prefix for call instructions. */
8128 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8129 {
8130 const char * size;
8131 switch (GET_MODE_SIZE (GET_MODE (x)))
8132 {
8133 case 1: size = "BYTE"; break;
8134 case 2: size = "WORD"; break;
8135 case 4: size = "DWORD"; break;
8136 case 8: size = "QWORD"; break;
8137 case 12: size = "XWORD"; break;
8138 case 16: size = "XMMWORD"; break;
8139 default:
8140 gcc_unreachable ();
8141 }
8142
8143 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8144 if (code == 'b')
8145 size = "BYTE";
8146 else if (code == 'w')
8147 size = "WORD";
8148 else if (code == 'k')
8149 size = "DWORD";
8150
8151 fputs (size, file);
8152 fputs (" PTR ", file);
8153 }
8154
8155 x = XEXP (x, 0);
8156 /* Avoid (%rip) for call operands. */
8157 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8158 && GET_CODE (x) != CONST_INT)
8159 output_addr_const (file, x);
8160 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8161 output_operand_lossage ("invalid constraints for operand");
8162 else
8163 output_address (x);
8164 }
8165
8166 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8167 {
8168 REAL_VALUE_TYPE r;
8169 long l;
8170
8171 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8172 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8173
8174 if (ASSEMBLER_DIALECT == ASM_ATT)
8175 putc ('$', file);
8176 fprintf (file, "0x%08lx", l);
8177 }
8178
8179 /* These float cases don't actually occur as immediate operands. */
8180 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8181 {
8182 char dstr[30];
8183
8184 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8185 fprintf (file, "%s", dstr);
8186 }
8187
8188 else if (GET_CODE (x) == CONST_DOUBLE
8189 && GET_MODE (x) == XFmode)
8190 {
8191 char dstr[30];
8192
8193 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8194 fprintf (file, "%s", dstr);
8195 }
8196
8197 else
8198 {
8199 /* We have patterns that allow zero sets of memory, for instance.
8200 In 64-bit mode, we should probably support all 8-byte vectors,
8201 since we can in fact encode that into an immediate. */
8202 if (GET_CODE (x) == CONST_VECTOR)
8203 {
8204 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8205 x = const0_rtx;
8206 }
8207
8208 if (code != 'P')
8209 {
8210 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8211 {
8212 if (ASSEMBLER_DIALECT == ASM_ATT)
8213 putc ('$', file);
8214 }
8215 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8216 || GET_CODE (x) == LABEL_REF)
8217 {
8218 if (ASSEMBLER_DIALECT == ASM_ATT)
8219 putc ('$', file);
8220 else
8221 fputs ("OFFSET FLAT:", file);
8222 }
8223 }
8224 if (GET_CODE (x) == CONST_INT)
8225 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8226 else if (flag_pic)
8227 output_pic_addr_const (file, x, code);
8228 else
8229 output_addr_const (file, x);
8230 }
8231 }
8232 \f
8233 /* Print a memory operand whose address is ADDR. */
8234
8235 void
8236 print_operand_address (FILE *file, rtx addr)
8237 {
8238 struct ix86_address parts;
8239 rtx base, index, disp;
8240 int scale;
8241 int ok = ix86_decompose_address (addr, &parts);
8242
8243 gcc_assert (ok);
8244
8245 base = parts.base;
8246 index = parts.index;
8247 disp = parts.disp;
8248 scale = parts.scale;
8249
8250 switch (parts.seg)
8251 {
8252 case SEG_DEFAULT:
8253 break;
8254 case SEG_FS:
8255 case SEG_GS:
8256 if (USER_LABEL_PREFIX[0] == 0)
8257 putc ('%', file);
8258 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8259 break;
8260 default:
8261 gcc_unreachable ();
8262 }
8263
8264 if (!base && !index)
8265 {
8266 /* Displacement only requires special attention. */
8267
8268 if (GET_CODE (disp) == CONST_INT)
8269 {
8270 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8271 {
8272 if (USER_LABEL_PREFIX[0] == 0)
8273 putc ('%', file);
8274 fputs ("ds:", file);
8275 }
8276 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8277 }
8278 else if (flag_pic)
8279 output_pic_addr_const (file, disp, 0);
8280 else
8281 output_addr_const (file, disp);
8282
8283 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8284 if (TARGET_64BIT)
8285 {
8286 if (GET_CODE (disp) == CONST
8287 && GET_CODE (XEXP (disp, 0)) == PLUS
8288 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8289 disp = XEXP (XEXP (disp, 0), 0);
8290 if (GET_CODE (disp) == LABEL_REF
8291 || (GET_CODE (disp) == SYMBOL_REF
8292 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8293 fputs ("(%rip)", file);
8294 }
8295 }
8296 else
8297 {
8298 if (ASSEMBLER_DIALECT == ASM_ATT)
8299 {
8300 if (disp)
8301 {
8302 if (flag_pic)
8303 output_pic_addr_const (file, disp, 0);
8304 else if (GET_CODE (disp) == LABEL_REF)
8305 output_asm_label (disp);
8306 else
8307 output_addr_const (file, disp);
8308 }
8309
8310 putc ('(', file);
8311 if (base)
8312 print_reg (base, 0, file);
8313 if (index)
8314 {
8315 putc (',', file);
8316 print_reg (index, 0, file);
8317 if (scale != 1)
8318 fprintf (file, ",%d", scale);
8319 }
8320 putc (')', file);
8321 }
8322 else
8323 {
8324 rtx offset = NULL_RTX;
8325
8326 if (disp)
8327 {
8328 /* Pull out the offset of a symbol; print any symbol itself. */
8329 if (GET_CODE (disp) == CONST
8330 && GET_CODE (XEXP (disp, 0)) == PLUS
8331 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8332 {
8333 offset = XEXP (XEXP (disp, 0), 1);
8334 disp = gen_rtx_CONST (VOIDmode,
8335 XEXP (XEXP (disp, 0), 0));
8336 }
8337
8338 if (flag_pic)
8339 output_pic_addr_const (file, disp, 0);
8340 else if (GET_CODE (disp) == LABEL_REF)
8341 output_asm_label (disp);
8342 else if (GET_CODE (disp) == CONST_INT)
8343 offset = disp;
8344 else
8345 output_addr_const (file, disp);
8346 }
8347
8348 putc ('[', file);
8349 if (base)
8350 {
8351 print_reg (base, 0, file);
8352 if (offset)
8353 {
8354 if (INTVAL (offset) >= 0)
8355 putc ('+', file);
8356 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8357 }
8358 }
8359 else if (offset)
8360 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8361 else
8362 putc ('0', file);
8363
8364 if (index)
8365 {
8366 putc ('+', file);
8367 print_reg (index, 0, file);
8368 if (scale != 1)
8369 fprintf (file, "*%d", scale);
8370 }
8371 putc (']', file);
8372 }
8373 }
8374 }
8375
8376 bool
8377 output_addr_const_extra (FILE *file, rtx x)
8378 {
8379 rtx op;
8380
8381 if (GET_CODE (x) != UNSPEC)
8382 return false;
8383
8384 op = XVECEXP (x, 0, 0);
8385 switch (XINT (x, 1))
8386 {
8387 case UNSPEC_GOTTPOFF:
8388 output_addr_const (file, op);
8389 /* FIXME: This might be @TPOFF in Sun ld. */
8390 fputs ("@GOTTPOFF", file);
8391 break;
8392 case UNSPEC_TPOFF:
8393 output_addr_const (file, op);
8394 fputs ("@TPOFF", file);
8395 break;
8396 case UNSPEC_NTPOFF:
8397 output_addr_const (file, op);
8398 if (TARGET_64BIT)
8399 fputs ("@TPOFF", file);
8400 else
8401 fputs ("@NTPOFF", file);
8402 break;
8403 case UNSPEC_DTPOFF:
8404 output_addr_const (file, op);
8405 fputs ("@DTPOFF", file);
8406 break;
8407 case UNSPEC_GOTNTPOFF:
8408 output_addr_const (file, op);
8409 if (TARGET_64BIT)
8410 fputs ("@GOTTPOFF(%rip)", file);
8411 else
8412 fputs ("@GOTNTPOFF", file);
8413 break;
8414 case UNSPEC_INDNTPOFF:
8415 output_addr_const (file, op);
8416 fputs ("@INDNTPOFF", file);
8417 break;
8418
8419 default:
8420 return false;
8421 }
8422
8423 return true;
8424 }
8425 \f
8426 /* Split one or more DImode RTL references into pairs of SImode
8427 references. The RTL can be REG, offsettable MEM, integer constant, or
8428 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8429 split and "num" is its length. lo_half and hi_half are output arrays
8430 that parallel "operands". */
8431
8432 void
8433 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8434 {
8435 while (num--)
8436 {
8437 rtx op = operands[num];
8438
8439 /* simplify_subreg refuse to split volatile memory addresses,
8440 but we still have to handle it. */
8441 if (GET_CODE (op) == MEM)
8442 {
8443 lo_half[num] = adjust_address (op, SImode, 0);
8444 hi_half[num] = adjust_address (op, SImode, 4);
8445 }
8446 else
8447 {
8448 lo_half[num] = simplify_gen_subreg (SImode, op,
8449 GET_MODE (op) == VOIDmode
8450 ? DImode : GET_MODE (op), 0);
8451 hi_half[num] = simplify_gen_subreg (SImode, op,
8452 GET_MODE (op) == VOIDmode
8453 ? DImode : GET_MODE (op), 4);
8454 }
8455 }
8456 }
8457 /* Split one or more TImode RTL references into pairs of DImode
8458 references. The RTL can be REG, offsettable MEM, integer constant, or
8459 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8460 split and "num" is its length. lo_half and hi_half are output arrays
8461 that parallel "operands". */
8462
8463 void
8464 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8465 {
8466 while (num--)
8467 {
8468 rtx op = operands[num];
8469
8470 /* simplify_subreg refuse to split volatile memory addresses, but we
8471 still have to handle it. */
8472 if (GET_CODE (op) == MEM)
8473 {
8474 lo_half[num] = adjust_address (op, DImode, 0);
8475 hi_half[num] = adjust_address (op, DImode, 8);
8476 }
8477 else
8478 {
8479 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8480 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8481 }
8482 }
8483 }
8484 \f
8485 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8486 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8487 is the expression of the binary operation. The output may either be
8488 emitted here, or returned to the caller, like all output_* functions.
8489
8490 There is no guarantee that the operands are the same mode, as they
8491 might be within FLOAT or FLOAT_EXTEND expressions. */
8492
8493 #ifndef SYSV386_COMPAT
8494 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8495 wants to fix the assemblers because that causes incompatibility
8496 with gcc. No-one wants to fix gcc because that causes
8497 incompatibility with assemblers... You can use the option of
8498 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8499 #define SYSV386_COMPAT 1
8500 #endif
8501
8502 const char *
8503 output_387_binary_op (rtx insn, rtx *operands)
8504 {
8505 static char buf[30];
8506 const char *p;
8507 const char *ssep;
8508 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8509
8510 #ifdef ENABLE_CHECKING
8511 /* Even if we do not want to check the inputs, this documents input
8512 constraints. Which helps in understanding the following code. */
8513 if (STACK_REG_P (operands[0])
8514 && ((REG_P (operands[1])
8515 && REGNO (operands[0]) == REGNO (operands[1])
8516 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8517 || (REG_P (operands[2])
8518 && REGNO (operands[0]) == REGNO (operands[2])
8519 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8520 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8521 ; /* ok */
8522 else
8523 gcc_assert (is_sse);
8524 #endif
8525
8526 switch (GET_CODE (operands[3]))
8527 {
8528 case PLUS:
8529 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8530 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8531 p = "fiadd";
8532 else
8533 p = "fadd";
8534 ssep = "add";
8535 break;
8536
8537 case MINUS:
8538 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8539 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8540 p = "fisub";
8541 else
8542 p = "fsub";
8543 ssep = "sub";
8544 break;
8545
8546 case MULT:
8547 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8548 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8549 p = "fimul";
8550 else
8551 p = "fmul";
8552 ssep = "mul";
8553 break;
8554
8555 case DIV:
8556 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8557 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8558 p = "fidiv";
8559 else
8560 p = "fdiv";
8561 ssep = "div";
8562 break;
8563
8564 default:
8565 gcc_unreachable ();
8566 }
8567
8568 if (is_sse)
8569 {
8570 strcpy (buf, ssep);
8571 if (GET_MODE (operands[0]) == SFmode)
8572 strcat (buf, "ss\t{%2, %0|%0, %2}");
8573 else
8574 strcat (buf, "sd\t{%2, %0|%0, %2}");
8575 return buf;
8576 }
8577 strcpy (buf, p);
8578
8579 switch (GET_CODE (operands[3]))
8580 {
8581 case MULT:
8582 case PLUS:
8583 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8584 {
8585 rtx temp = operands[2];
8586 operands[2] = operands[1];
8587 operands[1] = temp;
8588 }
8589
8590 /* know operands[0] == operands[1]. */
8591
8592 if (GET_CODE (operands[2]) == MEM)
8593 {
8594 p = "%z2\t%2";
8595 break;
8596 }
8597
8598 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8599 {
8600 if (STACK_TOP_P (operands[0]))
8601 /* How is it that we are storing to a dead operand[2]?
8602 Well, presumably operands[1] is dead too. We can't
8603 store the result to st(0) as st(0) gets popped on this
8604 instruction. Instead store to operands[2] (which I
8605 think has to be st(1)). st(1) will be popped later.
8606 gcc <= 2.8.1 didn't have this check and generated
8607 assembly code that the Unixware assembler rejected. */
8608 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8609 else
8610 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8611 break;
8612 }
8613
8614 if (STACK_TOP_P (operands[0]))
8615 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8616 else
8617 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8618 break;
8619
8620 case MINUS:
8621 case DIV:
8622 if (GET_CODE (operands[1]) == MEM)
8623 {
8624 p = "r%z1\t%1";
8625 break;
8626 }
8627
8628 if (GET_CODE (operands[2]) == MEM)
8629 {
8630 p = "%z2\t%2";
8631 break;
8632 }
8633
8634 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8635 {
8636 #if SYSV386_COMPAT
8637 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8638 derived assemblers, confusingly reverse the direction of
8639 the operation for fsub{r} and fdiv{r} when the
8640 destination register is not st(0). The Intel assembler
8641 doesn't have this brain damage. Read !SYSV386_COMPAT to
8642 figure out what the hardware really does. */
8643 if (STACK_TOP_P (operands[0]))
8644 p = "{p\t%0, %2|rp\t%2, %0}";
8645 else
8646 p = "{rp\t%2, %0|p\t%0, %2}";
8647 #else
8648 if (STACK_TOP_P (operands[0]))
8649 /* As above for fmul/fadd, we can't store to st(0). */
8650 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8651 else
8652 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8653 #endif
8654 break;
8655 }
8656
8657 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8658 {
8659 #if SYSV386_COMPAT
8660 if (STACK_TOP_P (operands[0]))
8661 p = "{rp\t%0, %1|p\t%1, %0}";
8662 else
8663 p = "{p\t%1, %0|rp\t%0, %1}";
8664 #else
8665 if (STACK_TOP_P (operands[0]))
8666 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8667 else
8668 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8669 #endif
8670 break;
8671 }
8672
8673 if (STACK_TOP_P (operands[0]))
8674 {
8675 if (STACK_TOP_P (operands[1]))
8676 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8677 else
8678 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8679 break;
8680 }
8681 else if (STACK_TOP_P (operands[1]))
8682 {
8683 #if SYSV386_COMPAT
8684 p = "{\t%1, %0|r\t%0, %1}";
8685 #else
8686 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8687 #endif
8688 }
8689 else
8690 {
8691 #if SYSV386_COMPAT
8692 p = "{r\t%2, %0|\t%0, %2}";
8693 #else
8694 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8695 #endif
8696 }
8697 break;
8698
8699 default:
8700 gcc_unreachable ();
8701 }
8702
8703 strcat (buf, p);
8704 return buf;
8705 }
8706
8707 /* Return needed mode for entity in optimize_mode_switching pass. */
8708
8709 int
8710 ix86_mode_needed (int entity, rtx insn)
8711 {
8712 enum attr_i387_cw mode;
8713
8714 /* The mode UNINITIALIZED is used to store control word after a
8715 function call or ASM pattern. The mode ANY specify that function
8716 has no requirements on the control word and make no changes in the
8717 bits we are interested in. */
8718
8719 if (CALL_P (insn)
8720 || (NONJUMP_INSN_P (insn)
8721 && (asm_noperands (PATTERN (insn)) >= 0
8722 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8723 return I387_CW_UNINITIALIZED;
8724
8725 if (recog_memoized (insn) < 0)
8726 return I387_CW_ANY;
8727
8728 mode = get_attr_i387_cw (insn);
8729
8730 switch (entity)
8731 {
8732 case I387_TRUNC:
8733 if (mode == I387_CW_TRUNC)
8734 return mode;
8735 break;
8736
8737 case I387_FLOOR:
8738 if (mode == I387_CW_FLOOR)
8739 return mode;
8740 break;
8741
8742 case I387_CEIL:
8743 if (mode == I387_CW_CEIL)
8744 return mode;
8745 break;
8746
8747 case I387_MASK_PM:
8748 if (mode == I387_CW_MASK_PM)
8749 return mode;
8750 break;
8751
8752 default:
8753 gcc_unreachable ();
8754 }
8755
8756 return I387_CW_ANY;
8757 }
8758
8759 /* Output code to initialize control word copies used by trunc?f?i and
8760 rounding patterns. CURRENT_MODE is set to current control word,
8761 while NEW_MODE is set to new control word. */
8762
8763 void
8764 emit_i387_cw_initialization (int mode)
8765 {
8766 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8767 rtx new_mode;
8768
8769 int slot;
8770
8771 rtx reg = gen_reg_rtx (HImode);
8772
8773 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8774 emit_move_insn (reg, copy_rtx (stored_mode));
8775
8776 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8777 {
8778 switch (mode)
8779 {
8780 case I387_CW_TRUNC:
8781 /* round toward zero (truncate) */
8782 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8783 slot = SLOT_CW_TRUNC;
8784 break;
8785
8786 case I387_CW_FLOOR:
8787 /* round down toward -oo */
8788 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8789 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8790 slot = SLOT_CW_FLOOR;
8791 break;
8792
8793 case I387_CW_CEIL:
8794 /* round up toward +oo */
8795 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8796 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8797 slot = SLOT_CW_CEIL;
8798 break;
8799
8800 case I387_CW_MASK_PM:
8801 /* mask precision exception for nearbyint() */
8802 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8803 slot = SLOT_CW_MASK_PM;
8804 break;
8805
8806 default:
8807 gcc_unreachable ();
8808 }
8809 }
8810 else
8811 {
8812 switch (mode)
8813 {
8814 case I387_CW_TRUNC:
8815 /* round toward zero (truncate) */
8816 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8817 slot = SLOT_CW_TRUNC;
8818 break;
8819
8820 case I387_CW_FLOOR:
8821 /* round down toward -oo */
8822 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8823 slot = SLOT_CW_FLOOR;
8824 break;
8825
8826 case I387_CW_CEIL:
8827 /* round up toward +oo */
8828 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8829 slot = SLOT_CW_CEIL;
8830 break;
8831
8832 case I387_CW_MASK_PM:
8833 /* mask precision exception for nearbyint() */
8834 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8835 slot = SLOT_CW_MASK_PM;
8836 break;
8837
8838 default:
8839 gcc_unreachable ();
8840 }
8841 }
8842
8843 gcc_assert (slot < MAX_386_STACK_LOCALS);
8844
8845 new_mode = assign_386_stack_local (HImode, slot);
8846 emit_move_insn (new_mode, reg);
8847 }
8848
8849 /* Output code for INSN to convert a float to a signed int. OPERANDS
8850 are the insn operands. The output may be [HSD]Imode and the input
8851 operand may be [SDX]Fmode. */
8852
8853 const char *
8854 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8855 {
8856 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8857 int dimode_p = GET_MODE (operands[0]) == DImode;
8858 int round_mode = get_attr_i387_cw (insn);
8859
8860 /* Jump through a hoop or two for DImode, since the hardware has no
8861 non-popping instruction. We used to do this a different way, but
8862 that was somewhat fragile and broke with post-reload splitters. */
8863 if ((dimode_p || fisttp) && !stack_top_dies)
8864 output_asm_insn ("fld\t%y1", operands);
8865
8866 gcc_assert (STACK_TOP_P (operands[1]));
8867 gcc_assert (GET_CODE (operands[0]) == MEM);
8868
8869 if (fisttp)
8870 output_asm_insn ("fisttp%z0\t%0", operands);
8871 else
8872 {
8873 if (round_mode != I387_CW_ANY)
8874 output_asm_insn ("fldcw\t%3", operands);
8875 if (stack_top_dies || dimode_p)
8876 output_asm_insn ("fistp%z0\t%0", operands);
8877 else
8878 output_asm_insn ("fist%z0\t%0", operands);
8879 if (round_mode != I387_CW_ANY)
8880 output_asm_insn ("fldcw\t%2", operands);
8881 }
8882
8883 return "";
8884 }
8885
8886 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8887 have the values zero or one, indicates the ffreep insn's operand
8888 from the OPERANDS array. */
8889
8890 static const char *
8891 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8892 {
8893 if (TARGET_USE_FFREEP)
8894 #if HAVE_AS_IX86_FFREEP
8895 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8896 #else
8897 {
8898 static char retval[] = ".word\t0xc_df";
8899 int regno = REGNO (operands[opno]);
8900
8901 gcc_assert (FP_REGNO_P (regno));
8902
8903 retval[9] = '0' + (regno - FIRST_STACK_REG);
8904 return retval;
8905 }
8906 #endif
8907
8908 return opno ? "fstp\t%y1" : "fstp\t%y0";
8909 }
8910
8911
8912 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8913 should be used. UNORDERED_P is true when fucom should be used. */
8914
8915 const char *
8916 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8917 {
8918 int stack_top_dies;
8919 rtx cmp_op0, cmp_op1;
8920 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8921
8922 if (eflags_p)
8923 {
8924 cmp_op0 = operands[0];
8925 cmp_op1 = operands[1];
8926 }
8927 else
8928 {
8929 cmp_op0 = operands[1];
8930 cmp_op1 = operands[2];
8931 }
8932
8933 if (is_sse)
8934 {
8935 if (GET_MODE (operands[0]) == SFmode)
8936 if (unordered_p)
8937 return "ucomiss\t{%1, %0|%0, %1}";
8938 else
8939 return "comiss\t{%1, %0|%0, %1}";
8940 else
8941 if (unordered_p)
8942 return "ucomisd\t{%1, %0|%0, %1}";
8943 else
8944 return "comisd\t{%1, %0|%0, %1}";
8945 }
8946
8947 gcc_assert (STACK_TOP_P (cmp_op0));
8948
8949 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8950
8951 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8952 {
8953 if (stack_top_dies)
8954 {
8955 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8956 return output_387_ffreep (operands, 1);
8957 }
8958 else
8959 return "ftst\n\tfnstsw\t%0";
8960 }
8961
8962 if (STACK_REG_P (cmp_op1)
8963 && stack_top_dies
8964 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8965 && REGNO (cmp_op1) != FIRST_STACK_REG)
8966 {
8967 /* If both the top of the 387 stack dies, and the other operand
8968 is also a stack register that dies, then this must be a
8969 `fcompp' float compare */
8970
8971 if (eflags_p)
8972 {
8973 /* There is no double popping fcomi variant. Fortunately,
8974 eflags is immune from the fstp's cc clobbering. */
8975 if (unordered_p)
8976 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8977 else
8978 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8979 return output_387_ffreep (operands, 0);
8980 }
8981 else
8982 {
8983 if (unordered_p)
8984 return "fucompp\n\tfnstsw\t%0";
8985 else
8986 return "fcompp\n\tfnstsw\t%0";
8987 }
8988 }
8989 else
8990 {
8991 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8992
8993 static const char * const alt[16] =
8994 {
8995 "fcom%z2\t%y2\n\tfnstsw\t%0",
8996 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8997 "fucom%z2\t%y2\n\tfnstsw\t%0",
8998 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8999
9000 "ficom%z2\t%y2\n\tfnstsw\t%0",
9001 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9002 NULL,
9003 NULL,
9004
9005 "fcomi\t{%y1, %0|%0, %y1}",
9006 "fcomip\t{%y1, %0|%0, %y1}",
9007 "fucomi\t{%y1, %0|%0, %y1}",
9008 "fucomip\t{%y1, %0|%0, %y1}",
9009
9010 NULL,
9011 NULL,
9012 NULL,
9013 NULL
9014 };
9015
9016 int mask;
9017 const char *ret;
9018
9019 mask = eflags_p << 3;
9020 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9021 mask |= unordered_p << 1;
9022 mask |= stack_top_dies;
9023
9024 gcc_assert (mask < 16);
9025 ret = alt[mask];
9026 gcc_assert (ret);
9027
9028 return ret;
9029 }
9030 }
9031
9032 void
9033 ix86_output_addr_vec_elt (FILE *file, int value)
9034 {
9035 const char *directive = ASM_LONG;
9036
9037 #ifdef ASM_QUAD
9038 if (TARGET_64BIT)
9039 directive = ASM_QUAD;
9040 #else
9041 gcc_assert (!TARGET_64BIT);
9042 #endif
9043
9044 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9045 }
9046
9047 void
9048 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9049 {
9050 if (TARGET_64BIT)
9051 fprintf (file, "%s%s%d-%s%d\n",
9052 ASM_LONG, LPREFIX, value, LPREFIX, rel);
9053 else if (HAVE_AS_GOTOFF_IN_DATA)
9054 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9055 #if TARGET_MACHO
9056 else if (TARGET_MACHO)
9057 {
9058 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9059 machopic_output_function_base_name (file);
9060 fprintf(file, "\n");
9061 }
9062 #endif
9063 else
9064 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9065 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9066 }
9067 \f
9068 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9069 for the target. */
9070
9071 void
9072 ix86_expand_clear (rtx dest)
9073 {
9074 rtx tmp;
9075
9076 /* We play register width games, which are only valid after reload. */
9077 gcc_assert (reload_completed);
9078
9079 /* Avoid HImode and its attendant prefix byte. */
9080 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9081 dest = gen_rtx_REG (SImode, REGNO (dest));
9082
9083 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9084
9085 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9086 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9087 {
9088 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9089 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9090 }
9091
9092 emit_insn (tmp);
9093 }
9094
9095 /* X is an unchanging MEM. If it is a constant pool reference, return
9096 the constant pool rtx, else NULL. */
9097
9098 rtx
9099 maybe_get_pool_constant (rtx x)
9100 {
9101 x = ix86_delegitimize_address (XEXP (x, 0));
9102
9103 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9104 return get_pool_constant (x);
9105
9106 return NULL_RTX;
9107 }
9108
9109 void
9110 ix86_expand_move (enum machine_mode mode, rtx operands[])
9111 {
9112 int strict = (reload_in_progress || reload_completed);
9113 rtx op0, op1;
9114 enum tls_model model;
9115
9116 op0 = operands[0];
9117 op1 = operands[1];
9118
9119 if (GET_CODE (op1) == SYMBOL_REF)
9120 {
9121 model = SYMBOL_REF_TLS_MODEL (op1);
9122 if (model)
9123 {
9124 op1 = legitimize_tls_address (op1, model, true);
9125 op1 = force_operand (op1, op0);
9126 if (op1 == op0)
9127 return;
9128 }
9129 }
9130 else if (GET_CODE (op1) == CONST
9131 && GET_CODE (XEXP (op1, 0)) == PLUS
9132 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9133 {
9134 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9135 if (model)
9136 {
9137 rtx addend = XEXP (XEXP (op1, 0), 1);
9138 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9139 op1 = force_operand (op1, NULL);
9140 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9141 op0, 1, OPTAB_DIRECT);
9142 if (op1 == op0)
9143 return;
9144 }
9145 }
9146
9147 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9148 {
9149 if (TARGET_MACHO && !TARGET_64BIT)
9150 {
9151 #if TARGET_MACHO
9152 if (MACHOPIC_PURE)
9153 {
9154 rtx temp = ((reload_in_progress
9155 || ((op0 && GET_CODE (op0) == REG)
9156 && mode == Pmode))
9157 ? op0 : gen_reg_rtx (Pmode));
9158 op1 = machopic_indirect_data_reference (op1, temp);
9159 op1 = machopic_legitimize_pic_address (op1, mode,
9160 temp == op1 ? 0 : temp);
9161 }
9162 else if (MACHOPIC_INDIRECT)
9163 op1 = machopic_indirect_data_reference (op1, 0);
9164 if (op0 == op1)
9165 return;
9166 #endif
9167 }
9168 else
9169 {
9170 if (GET_CODE (op0) == MEM)
9171 op1 = force_reg (Pmode, op1);
9172 else
9173 op1 = legitimize_address (op1, op1, Pmode);
9174 }
9175 }
9176 else
9177 {
9178 if (GET_CODE (op0) == MEM
9179 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9180 || !push_operand (op0, mode))
9181 && GET_CODE (op1) == MEM)
9182 op1 = force_reg (mode, op1);
9183
9184 if (push_operand (op0, mode)
9185 && ! general_no_elim_operand (op1, mode))
9186 op1 = copy_to_mode_reg (mode, op1);
9187
9188 /* Force large constants in 64bit compilation into register
9189 to get them CSEed. */
9190 if (TARGET_64BIT && mode == DImode
9191 && immediate_operand (op1, mode)
9192 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9193 && !register_operand (op0, mode)
9194 && optimize && !reload_completed && !reload_in_progress)
9195 op1 = copy_to_mode_reg (mode, op1);
9196
9197 if (FLOAT_MODE_P (mode))
9198 {
9199 /* If we are loading a floating point constant to a register,
9200 force the value to memory now, since we'll get better code
9201 out the back end. */
9202
9203 if (strict)
9204 ;
9205 else if (GET_CODE (op1) == CONST_DOUBLE)
9206 {
9207 op1 = validize_mem (force_const_mem (mode, op1));
9208 if (!register_operand (op0, mode))
9209 {
9210 rtx temp = gen_reg_rtx (mode);
9211 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9212 emit_move_insn (op0, temp);
9213 return;
9214 }
9215 }
9216 }
9217 }
9218
9219 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9220 }
9221
9222 void
9223 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9224 {
9225 rtx op0 = operands[0], op1 = operands[1];
9226
9227 /* Force constants other than zero into memory. We do not know how
9228 the instructions used to build constants modify the upper 64 bits
9229 of the register, once we have that information we may be able
9230 to handle some of them more efficiently. */
9231 if ((reload_in_progress | reload_completed) == 0
9232 && register_operand (op0, mode)
9233 && CONSTANT_P (op1)
9234 && standard_sse_constant_p (op1) <= 0)
9235 op1 = validize_mem (force_const_mem (mode, op1));
9236
9237 /* Make operand1 a register if it isn't already. */
9238 if (!no_new_pseudos
9239 && !register_operand (op0, mode)
9240 && !register_operand (op1, mode))
9241 {
9242 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9243 return;
9244 }
9245
9246 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9247 }
9248
9249 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9250 straight to ix86_expand_vector_move. */
9251
9252 void
9253 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9254 {
9255 rtx op0, op1, m;
9256
9257 op0 = operands[0];
9258 op1 = operands[1];
9259
9260 if (MEM_P (op1))
9261 {
9262 /* If we're optimizing for size, movups is the smallest. */
9263 if (optimize_size)
9264 {
9265 op0 = gen_lowpart (V4SFmode, op0);
9266 op1 = gen_lowpart (V4SFmode, op1);
9267 emit_insn (gen_sse_movups (op0, op1));
9268 return;
9269 }
9270
9271 /* ??? If we have typed data, then it would appear that using
9272 movdqu is the only way to get unaligned data loaded with
9273 integer type. */
9274 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9275 {
9276 op0 = gen_lowpart (V16QImode, op0);
9277 op1 = gen_lowpart (V16QImode, op1);
9278 emit_insn (gen_sse2_movdqu (op0, op1));
9279 return;
9280 }
9281
9282 if (TARGET_SSE2 && mode == V2DFmode)
9283 {
9284 rtx zero;
9285
9286 /* When SSE registers are split into halves, we can avoid
9287 writing to the top half twice. */
9288 if (TARGET_SSE_SPLIT_REGS)
9289 {
9290 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9291 zero = op0;
9292 }
9293 else
9294 {
9295 /* ??? Not sure about the best option for the Intel chips.
9296 The following would seem to satisfy; the register is
9297 entirely cleared, breaking the dependency chain. We
9298 then store to the upper half, with a dependency depth
9299 of one. A rumor has it that Intel recommends two movsd
9300 followed by an unpacklpd, but this is unconfirmed. And
9301 given that the dependency depth of the unpacklpd would
9302 still be one, I'm not sure why this would be better. */
9303 zero = CONST0_RTX (V2DFmode);
9304 }
9305
9306 m = adjust_address (op1, DFmode, 0);
9307 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9308 m = adjust_address (op1, DFmode, 8);
9309 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9310 }
9311 else
9312 {
9313 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9314 emit_move_insn (op0, CONST0_RTX (mode));
9315 else
9316 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9317
9318 if (mode != V4SFmode)
9319 op0 = gen_lowpart (V4SFmode, op0);
9320 m = adjust_address (op1, V2SFmode, 0);
9321 emit_insn (gen_sse_loadlps (op0, op0, m));
9322 m = adjust_address (op1, V2SFmode, 8);
9323 emit_insn (gen_sse_loadhps (op0, op0, m));
9324 }
9325 }
9326 else if (MEM_P (op0))
9327 {
9328 /* If we're optimizing for size, movups is the smallest. */
9329 if (optimize_size)
9330 {
9331 op0 = gen_lowpart (V4SFmode, op0);
9332 op1 = gen_lowpart (V4SFmode, op1);
9333 emit_insn (gen_sse_movups (op0, op1));
9334 return;
9335 }
9336
9337 /* ??? Similar to above, only less clear because of quote
9338 typeless stores unquote. */
9339 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9340 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9341 {
9342 op0 = gen_lowpart (V16QImode, op0);
9343 op1 = gen_lowpart (V16QImode, op1);
9344 emit_insn (gen_sse2_movdqu (op0, op1));
9345 return;
9346 }
9347
9348 if (TARGET_SSE2 && mode == V2DFmode)
9349 {
9350 m = adjust_address (op0, DFmode, 0);
9351 emit_insn (gen_sse2_storelpd (m, op1));
9352 m = adjust_address (op0, DFmode, 8);
9353 emit_insn (gen_sse2_storehpd (m, op1));
9354 }
9355 else
9356 {
9357 if (mode != V4SFmode)
9358 op1 = gen_lowpart (V4SFmode, op1);
9359 m = adjust_address (op0, V2SFmode, 0);
9360 emit_insn (gen_sse_storelps (m, op1));
9361 m = adjust_address (op0, V2SFmode, 8);
9362 emit_insn (gen_sse_storehps (m, op1));
9363 }
9364 }
9365 else
9366 gcc_unreachable ();
9367 }
9368
9369 /* Expand a push in MODE. This is some mode for which we do not support
9370 proper push instructions, at least from the registers that we expect
9371 the value to live in. */
9372
9373 void
9374 ix86_expand_push (enum machine_mode mode, rtx x)
9375 {
9376 rtx tmp;
9377
9378 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9379 GEN_INT (-GET_MODE_SIZE (mode)),
9380 stack_pointer_rtx, 1, OPTAB_DIRECT);
9381 if (tmp != stack_pointer_rtx)
9382 emit_move_insn (stack_pointer_rtx, tmp);
9383
9384 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9385 emit_move_insn (tmp, x);
9386 }
9387
9388 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9389 destination to use for the operation. If different from the true
9390 destination in operands[0], a copy operation will be required. */
9391
9392 rtx
9393 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9394 rtx operands[])
9395 {
9396 int matching_memory;
9397 rtx src1, src2, dst;
9398
9399 dst = operands[0];
9400 src1 = operands[1];
9401 src2 = operands[2];
9402
9403 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9404 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9405 && (rtx_equal_p (dst, src2)
9406 || immediate_operand (src1, mode)))
9407 {
9408 rtx temp = src1;
9409 src1 = src2;
9410 src2 = temp;
9411 }
9412
9413 /* If the destination is memory, and we do not have matching source
9414 operands, do things in registers. */
9415 matching_memory = 0;
9416 if (GET_CODE (dst) == MEM)
9417 {
9418 if (rtx_equal_p (dst, src1))
9419 matching_memory = 1;
9420 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9421 && rtx_equal_p (dst, src2))
9422 matching_memory = 2;
9423 else
9424 dst = gen_reg_rtx (mode);
9425 }
9426
9427 /* Both source operands cannot be in memory. */
9428 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9429 {
9430 if (matching_memory != 2)
9431 src2 = force_reg (mode, src2);
9432 else
9433 src1 = force_reg (mode, src1);
9434 }
9435
9436 /* If the operation is not commutable, source 1 cannot be a constant
9437 or non-matching memory. */
9438 if ((CONSTANT_P (src1)
9439 || (!matching_memory && GET_CODE (src1) == MEM))
9440 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9441 src1 = force_reg (mode, src1);
9442
9443 src1 = operands[1] = src1;
9444 src2 = operands[2] = src2;
9445 return dst;
9446 }
9447
9448 /* Similarly, but assume that the destination has already been
9449 set up properly. */
9450
9451 void
9452 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9453 enum machine_mode mode, rtx operands[])
9454 {
9455 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9456 gcc_assert (dst == operands[0]);
9457 }
9458
9459 /* Attempt to expand a binary operator. Make the expansion closer to the
9460 actual machine, then just general_operand, which will allow 3 separate
9461 memory references (one output, two input) in a single insn. */
9462
9463 void
9464 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9465 rtx operands[])
9466 {
9467 rtx src1, src2, dst, op, clob;
9468
9469 dst = ix86_fixup_binary_operands (code, mode, operands);
9470 src1 = operands[1];
9471 src2 = operands[2];
9472
9473 /* Emit the instruction. */
9474
9475 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9476 if (reload_in_progress)
9477 {
9478 /* Reload doesn't know about the flags register, and doesn't know that
9479 it doesn't want to clobber it. We can only do this with PLUS. */
9480 gcc_assert (code == PLUS);
9481 emit_insn (op);
9482 }
9483 else
9484 {
9485 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9486 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9487 }
9488
9489 /* Fix up the destination if needed. */
9490 if (dst != operands[0])
9491 emit_move_insn (operands[0], dst);
9492 }
9493
9494 /* Return TRUE or FALSE depending on whether the binary operator meets the
9495 appropriate constraints. */
9496
9497 int
9498 ix86_binary_operator_ok (enum rtx_code code,
9499 enum machine_mode mode ATTRIBUTE_UNUSED,
9500 rtx operands[3])
9501 {
9502 /* Both source operands cannot be in memory. */
9503 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9504 return 0;
9505 /* If the operation is not commutable, source 1 cannot be a constant. */
9506 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9507 return 0;
9508 /* If the destination is memory, we must have a matching source operand. */
9509 if (GET_CODE (operands[0]) == MEM
9510 && ! (rtx_equal_p (operands[0], operands[1])
9511 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9512 && rtx_equal_p (operands[0], operands[2]))))
9513 return 0;
9514 /* If the operation is not commutable and the source 1 is memory, we must
9515 have a matching destination. */
9516 if (GET_CODE (operands[1]) == MEM
9517 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9518 && ! rtx_equal_p (operands[0], operands[1]))
9519 return 0;
9520 return 1;
9521 }
9522
9523 /* Attempt to expand a unary operator. Make the expansion closer to the
9524 actual machine, then just general_operand, which will allow 2 separate
9525 memory references (one output, one input) in a single insn. */
9526
9527 void
9528 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9529 rtx operands[])
9530 {
9531 int matching_memory;
9532 rtx src, dst, op, clob;
9533
9534 dst = operands[0];
9535 src = operands[1];
9536
9537 /* If the destination is memory, and we do not have matching source
9538 operands, do things in registers. */
9539 matching_memory = 0;
9540 if (MEM_P (dst))
9541 {
9542 if (rtx_equal_p (dst, src))
9543 matching_memory = 1;
9544 else
9545 dst = gen_reg_rtx (mode);
9546 }
9547
9548 /* When source operand is memory, destination must match. */
9549 if (MEM_P (src) && !matching_memory)
9550 src = force_reg (mode, src);
9551
9552 /* Emit the instruction. */
9553
9554 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9555 if (reload_in_progress || code == NOT)
9556 {
9557 /* Reload doesn't know about the flags register, and doesn't know that
9558 it doesn't want to clobber it. */
9559 gcc_assert (code == NOT);
9560 emit_insn (op);
9561 }
9562 else
9563 {
9564 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9565 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9566 }
9567
9568 /* Fix up the destination if needed. */
9569 if (dst != operands[0])
9570 emit_move_insn (operands[0], dst);
9571 }
9572
9573 /* Return TRUE or FALSE depending on whether the unary operator meets the
9574 appropriate constraints. */
9575
9576 int
9577 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9578 enum machine_mode mode ATTRIBUTE_UNUSED,
9579 rtx operands[2] ATTRIBUTE_UNUSED)
9580 {
9581 /* If one of operands is memory, source and destination must match. */
9582 if ((GET_CODE (operands[0]) == MEM
9583 || GET_CODE (operands[1]) == MEM)
9584 && ! rtx_equal_p (operands[0], operands[1]))
9585 return FALSE;
9586 return TRUE;
9587 }
9588
9589 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9590 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9591 true, then replicate the mask for all elements of the vector register.
9592 If INVERT is true, then create a mask excluding the sign bit. */
9593
9594 rtx
9595 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9596 {
9597 enum machine_mode vec_mode;
9598 HOST_WIDE_INT hi, lo;
9599 int shift = 63;
9600 rtvec v;
9601 rtx mask;
9602
9603 /* Find the sign bit, sign extended to 2*HWI. */
9604 if (mode == SFmode)
9605 lo = 0x80000000, hi = lo < 0;
9606 else if (HOST_BITS_PER_WIDE_INT >= 64)
9607 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9608 else
9609 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9610
9611 if (invert)
9612 lo = ~lo, hi = ~hi;
9613
9614 /* Force this value into the low part of a fp vector constant. */
9615 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9616 mask = gen_lowpart (mode, mask);
9617
9618 if (mode == SFmode)
9619 {
9620 if (vect)
9621 v = gen_rtvec (4, mask, mask, mask, mask);
9622 else
9623 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9624 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9625 vec_mode = V4SFmode;
9626 }
9627 else
9628 {
9629 if (vect)
9630 v = gen_rtvec (2, mask, mask);
9631 else
9632 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9633 vec_mode = V2DFmode;
9634 }
9635
9636 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9637 }
9638
9639 /* Generate code for floating point ABS or NEG. */
9640
9641 void
9642 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9643 rtx operands[])
9644 {
9645 rtx mask, set, use, clob, dst, src;
9646 bool matching_memory;
9647 bool use_sse = false;
9648 bool vector_mode = VECTOR_MODE_P (mode);
9649 enum machine_mode elt_mode = mode;
9650
9651 if (vector_mode)
9652 {
9653 elt_mode = GET_MODE_INNER (mode);
9654 use_sse = true;
9655 }
9656 else if (TARGET_SSE_MATH)
9657 use_sse = SSE_FLOAT_MODE_P (mode);
9658
9659 /* NEG and ABS performed with SSE use bitwise mask operations.
9660 Create the appropriate mask now. */
9661 if (use_sse)
9662 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9663 else
9664 mask = NULL_RTX;
9665
9666 dst = operands[0];
9667 src = operands[1];
9668
9669 /* If the destination is memory, and we don't have matching source
9670 operands or we're using the x87, do things in registers. */
9671 matching_memory = false;
9672 if (MEM_P (dst))
9673 {
9674 if (use_sse && rtx_equal_p (dst, src))
9675 matching_memory = true;
9676 else
9677 dst = gen_reg_rtx (mode);
9678 }
9679 if (MEM_P (src) && !matching_memory)
9680 src = force_reg (mode, src);
9681
9682 if (vector_mode)
9683 {
9684 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9685 set = gen_rtx_SET (VOIDmode, dst, set);
9686 emit_insn (set);
9687 }
9688 else
9689 {
9690 set = gen_rtx_fmt_e (code, mode, src);
9691 set = gen_rtx_SET (VOIDmode, dst, set);
9692 if (mask)
9693 {
9694 use = gen_rtx_USE (VOIDmode, mask);
9695 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9696 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9697 gen_rtvec (3, set, use, clob)));
9698 }
9699 else
9700 emit_insn (set);
9701 }
9702
9703 if (dst != operands[0])
9704 emit_move_insn (operands[0], dst);
9705 }
9706
9707 /* Expand a copysign operation. Special case operand 0 being a constant. */
9708
9709 void
9710 ix86_expand_copysign (rtx operands[])
9711 {
9712 enum machine_mode mode, vmode;
9713 rtx dest, op0, op1, mask, nmask;
9714
9715 dest = operands[0];
9716 op0 = operands[1];
9717 op1 = operands[2];
9718
9719 mode = GET_MODE (dest);
9720 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9721
9722 if (GET_CODE (op0) == CONST_DOUBLE)
9723 {
9724 rtvec v;
9725
9726 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9727 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9728
9729 if (op0 == CONST0_RTX (mode))
9730 op0 = CONST0_RTX (vmode);
9731 else
9732 {
9733 if (mode == SFmode)
9734 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9735 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9736 else
9737 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9738 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9739 }
9740
9741 mask = ix86_build_signbit_mask (mode, 0, 0);
9742
9743 if (mode == SFmode)
9744 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9745 else
9746 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9747 }
9748 else
9749 {
9750 nmask = ix86_build_signbit_mask (mode, 0, 1);
9751 mask = ix86_build_signbit_mask (mode, 0, 0);
9752
9753 if (mode == SFmode)
9754 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9755 else
9756 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9757 }
9758 }
9759
9760 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9761 be a constant, and so has already been expanded into a vector constant. */
9762
9763 void
9764 ix86_split_copysign_const (rtx operands[])
9765 {
9766 enum machine_mode mode, vmode;
9767 rtx dest, op0, op1, mask, x;
9768
9769 dest = operands[0];
9770 op0 = operands[1];
9771 op1 = operands[2];
9772 mask = operands[3];
9773
9774 mode = GET_MODE (dest);
9775 vmode = GET_MODE (mask);
9776
9777 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9778 x = gen_rtx_AND (vmode, dest, mask);
9779 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9780
9781 if (op0 != CONST0_RTX (vmode))
9782 {
9783 x = gen_rtx_IOR (vmode, dest, op0);
9784 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9785 }
9786 }
9787
9788 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9789 so we have to do two masks. */
9790
9791 void
9792 ix86_split_copysign_var (rtx operands[])
9793 {
9794 enum machine_mode mode, vmode;
9795 rtx dest, scratch, op0, op1, mask, nmask, x;
9796
9797 dest = operands[0];
9798 scratch = operands[1];
9799 op0 = operands[2];
9800 op1 = operands[3];
9801 nmask = operands[4];
9802 mask = operands[5];
9803
9804 mode = GET_MODE (dest);
9805 vmode = GET_MODE (mask);
9806
9807 if (rtx_equal_p (op0, op1))
9808 {
9809 /* Shouldn't happen often (it's useless, obviously), but when it does
9810 we'd generate incorrect code if we continue below. */
9811 emit_move_insn (dest, op0);
9812 return;
9813 }
9814
9815 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9816 {
9817 gcc_assert (REGNO (op1) == REGNO (scratch));
9818
9819 x = gen_rtx_AND (vmode, scratch, mask);
9820 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9821
9822 dest = mask;
9823 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9824 x = gen_rtx_NOT (vmode, dest);
9825 x = gen_rtx_AND (vmode, x, op0);
9826 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9827 }
9828 else
9829 {
9830 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9831 {
9832 x = gen_rtx_AND (vmode, scratch, mask);
9833 }
9834 else /* alternative 2,4 */
9835 {
9836 gcc_assert (REGNO (mask) == REGNO (scratch));
9837 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9838 x = gen_rtx_AND (vmode, scratch, op1);
9839 }
9840 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9841
9842 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9843 {
9844 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9845 x = gen_rtx_AND (vmode, dest, nmask);
9846 }
9847 else /* alternative 3,4 */
9848 {
9849 gcc_assert (REGNO (nmask) == REGNO (dest));
9850 dest = nmask;
9851 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9852 x = gen_rtx_AND (vmode, dest, op0);
9853 }
9854 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9855 }
9856
9857 x = gen_rtx_IOR (vmode, dest, scratch);
9858 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9859 }
9860
9861 /* Return TRUE or FALSE depending on whether the first SET in INSN
9862 has source and destination with matching CC modes, and that the
9863 CC mode is at least as constrained as REQ_MODE. */
9864
9865 int
9866 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9867 {
9868 rtx set;
9869 enum machine_mode set_mode;
9870
9871 set = PATTERN (insn);
9872 if (GET_CODE (set) == PARALLEL)
9873 set = XVECEXP (set, 0, 0);
9874 gcc_assert (GET_CODE (set) == SET);
9875 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9876
9877 set_mode = GET_MODE (SET_DEST (set));
9878 switch (set_mode)
9879 {
9880 case CCNOmode:
9881 if (req_mode != CCNOmode
9882 && (req_mode != CCmode
9883 || XEXP (SET_SRC (set), 1) != const0_rtx))
9884 return 0;
9885 break;
9886 case CCmode:
9887 if (req_mode == CCGCmode)
9888 return 0;
9889 /* FALLTHRU */
9890 case CCGCmode:
9891 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9892 return 0;
9893 /* FALLTHRU */
9894 case CCGOCmode:
9895 if (req_mode == CCZmode)
9896 return 0;
9897 /* FALLTHRU */
9898 case CCZmode:
9899 break;
9900
9901 default:
9902 gcc_unreachable ();
9903 }
9904
9905 return (GET_MODE (SET_SRC (set)) == set_mode);
9906 }
9907
9908 /* Generate insn patterns to do an integer compare of OPERANDS. */
9909
9910 static rtx
9911 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9912 {
9913 enum machine_mode cmpmode;
9914 rtx tmp, flags;
9915
9916 cmpmode = SELECT_CC_MODE (code, op0, op1);
9917 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9918
9919 /* This is very simple, but making the interface the same as in the
9920 FP case makes the rest of the code easier. */
9921 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9922 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9923
9924 /* Return the test that should be put into the flags user, i.e.
9925 the bcc, scc, or cmov instruction. */
9926 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9927 }
9928
9929 /* Figure out whether to use ordered or unordered fp comparisons.
9930 Return the appropriate mode to use. */
9931
9932 enum machine_mode
9933 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9934 {
9935 /* ??? In order to make all comparisons reversible, we do all comparisons
9936 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9937 all forms trapping and nontrapping comparisons, we can make inequality
9938 comparisons trapping again, since it results in better code when using
9939 FCOM based compares. */
9940 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9941 }
9942
9943 enum machine_mode
9944 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9945 {
9946 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9947 return ix86_fp_compare_mode (code);
9948 switch (code)
9949 {
9950 /* Only zero flag is needed. */
9951 case EQ: /* ZF=0 */
9952 case NE: /* ZF!=0 */
9953 return CCZmode;
9954 /* Codes needing carry flag. */
9955 case GEU: /* CF=0 */
9956 case GTU: /* CF=0 & ZF=0 */
9957 case LTU: /* CF=1 */
9958 case LEU: /* CF=1 | ZF=1 */
9959 return CCmode;
9960 /* Codes possibly doable only with sign flag when
9961 comparing against zero. */
9962 case GE: /* SF=OF or SF=0 */
9963 case LT: /* SF<>OF or SF=1 */
9964 if (op1 == const0_rtx)
9965 return CCGOCmode;
9966 else
9967 /* For other cases Carry flag is not required. */
9968 return CCGCmode;
9969 /* Codes doable only with sign flag when comparing
9970 against zero, but we miss jump instruction for it
9971 so we need to use relational tests against overflow
9972 that thus needs to be zero. */
9973 case GT: /* ZF=0 & SF=OF */
9974 case LE: /* ZF=1 | SF<>OF */
9975 if (op1 == const0_rtx)
9976 return CCNOmode;
9977 else
9978 return CCGCmode;
9979 /* strcmp pattern do (use flags) and combine may ask us for proper
9980 mode. */
9981 case USE:
9982 return CCmode;
9983 default:
9984 gcc_unreachable ();
9985 }
9986 }
9987
9988 /* Return the fixed registers used for condition codes. */
9989
9990 static bool
9991 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9992 {
9993 *p1 = FLAGS_REG;
9994 *p2 = FPSR_REG;
9995 return true;
9996 }
9997
9998 /* If two condition code modes are compatible, return a condition code
9999 mode which is compatible with both. Otherwise, return
10000 VOIDmode. */
10001
10002 static enum machine_mode
10003 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10004 {
10005 if (m1 == m2)
10006 return m1;
10007
10008 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10009 return VOIDmode;
10010
10011 if ((m1 == CCGCmode && m2 == CCGOCmode)
10012 || (m1 == CCGOCmode && m2 == CCGCmode))
10013 return CCGCmode;
10014
10015 switch (m1)
10016 {
10017 default:
10018 gcc_unreachable ();
10019
10020 case CCmode:
10021 case CCGCmode:
10022 case CCGOCmode:
10023 case CCNOmode:
10024 case CCZmode:
10025 switch (m2)
10026 {
10027 default:
10028 return VOIDmode;
10029
10030 case CCmode:
10031 case CCGCmode:
10032 case CCGOCmode:
10033 case CCNOmode:
10034 case CCZmode:
10035 return CCmode;
10036 }
10037
10038 case CCFPmode:
10039 case CCFPUmode:
10040 /* These are only compatible with themselves, which we already
10041 checked above. */
10042 return VOIDmode;
10043 }
10044 }
10045
10046 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10047
10048 int
10049 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10050 {
10051 enum rtx_code swapped_code = swap_condition (code);
10052 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10053 || (ix86_fp_comparison_cost (swapped_code)
10054 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10055 }
10056
10057 /* Swap, force into registers, or otherwise massage the two operands
10058 to a fp comparison. The operands are updated in place; the new
10059 comparison code is returned. */
10060
10061 static enum rtx_code
10062 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10063 {
10064 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10065 rtx op0 = *pop0, op1 = *pop1;
10066 enum machine_mode op_mode = GET_MODE (op0);
10067 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10068
10069 /* All of the unordered compare instructions only work on registers.
10070 The same is true of the fcomi compare instructions. The XFmode
10071 compare instructions require registers except when comparing
10072 against zero or when converting operand 1 from fixed point to
10073 floating point. */
10074
10075 if (!is_sse
10076 && (fpcmp_mode == CCFPUmode
10077 || (op_mode == XFmode
10078 && ! (standard_80387_constant_p (op0) == 1
10079 || standard_80387_constant_p (op1) == 1)
10080 && GET_CODE (op1) != FLOAT)
10081 || ix86_use_fcomi_compare (code)))
10082 {
10083 op0 = force_reg (op_mode, op0);
10084 op1 = force_reg (op_mode, op1);
10085 }
10086 else
10087 {
10088 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10089 things around if they appear profitable, otherwise force op0
10090 into a register. */
10091
10092 if (standard_80387_constant_p (op0) == 0
10093 || (GET_CODE (op0) == MEM
10094 && ! (standard_80387_constant_p (op1) == 0
10095 || GET_CODE (op1) == MEM)))
10096 {
10097 rtx tmp;
10098 tmp = op0, op0 = op1, op1 = tmp;
10099 code = swap_condition (code);
10100 }
10101
10102 if (GET_CODE (op0) != REG)
10103 op0 = force_reg (op_mode, op0);
10104
10105 if (CONSTANT_P (op1))
10106 {
10107 int tmp = standard_80387_constant_p (op1);
10108 if (tmp == 0)
10109 op1 = validize_mem (force_const_mem (op_mode, op1));
10110 else if (tmp == 1)
10111 {
10112 if (TARGET_CMOVE)
10113 op1 = force_reg (op_mode, op1);
10114 }
10115 else
10116 op1 = force_reg (op_mode, op1);
10117 }
10118 }
10119
10120 /* Try to rearrange the comparison to make it cheaper. */
10121 if (ix86_fp_comparison_cost (code)
10122 > ix86_fp_comparison_cost (swap_condition (code))
10123 && (GET_CODE (op1) == REG || !no_new_pseudos))
10124 {
10125 rtx tmp;
10126 tmp = op0, op0 = op1, op1 = tmp;
10127 code = swap_condition (code);
10128 if (GET_CODE (op0) != REG)
10129 op0 = force_reg (op_mode, op0);
10130 }
10131
10132 *pop0 = op0;
10133 *pop1 = op1;
10134 return code;
10135 }
10136
10137 /* Convert comparison codes we use to represent FP comparison to integer
10138 code that will result in proper branch. Return UNKNOWN if no such code
10139 is available. */
10140
10141 enum rtx_code
10142 ix86_fp_compare_code_to_integer (enum rtx_code code)
10143 {
10144 switch (code)
10145 {
10146 case GT:
10147 return GTU;
10148 case GE:
10149 return GEU;
10150 case ORDERED:
10151 case UNORDERED:
10152 return code;
10153 break;
10154 case UNEQ:
10155 return EQ;
10156 break;
10157 case UNLT:
10158 return LTU;
10159 break;
10160 case UNLE:
10161 return LEU;
10162 break;
10163 case LTGT:
10164 return NE;
10165 break;
10166 default:
10167 return UNKNOWN;
10168 }
10169 }
10170
10171 /* Split comparison code CODE into comparisons we can do using branch
10172 instructions. BYPASS_CODE is comparison code for branch that will
10173 branch around FIRST_CODE and SECOND_CODE. If some of branches
10174 is not required, set value to UNKNOWN.
10175 We never require more than two branches. */
10176
10177 void
10178 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10179 enum rtx_code *first_code,
10180 enum rtx_code *second_code)
10181 {
10182 *first_code = code;
10183 *bypass_code = UNKNOWN;
10184 *second_code = UNKNOWN;
10185
10186 /* The fcomi comparison sets flags as follows:
10187
10188 cmp ZF PF CF
10189 > 0 0 0
10190 < 0 0 1
10191 = 1 0 0
10192 un 1 1 1 */
10193
10194 switch (code)
10195 {
10196 case GT: /* GTU - CF=0 & ZF=0 */
10197 case GE: /* GEU - CF=0 */
10198 case ORDERED: /* PF=0 */
10199 case UNORDERED: /* PF=1 */
10200 case UNEQ: /* EQ - ZF=1 */
10201 case UNLT: /* LTU - CF=1 */
10202 case UNLE: /* LEU - CF=1 | ZF=1 */
10203 case LTGT: /* EQ - ZF=0 */
10204 break;
10205 case LT: /* LTU - CF=1 - fails on unordered */
10206 *first_code = UNLT;
10207 *bypass_code = UNORDERED;
10208 break;
10209 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10210 *first_code = UNLE;
10211 *bypass_code = UNORDERED;
10212 break;
10213 case EQ: /* EQ - ZF=1 - fails on unordered */
10214 *first_code = UNEQ;
10215 *bypass_code = UNORDERED;
10216 break;
10217 case NE: /* NE - ZF=0 - fails on unordered */
10218 *first_code = LTGT;
10219 *second_code = UNORDERED;
10220 break;
10221 case UNGE: /* GEU - CF=0 - fails on unordered */
10222 *first_code = GE;
10223 *second_code = UNORDERED;
10224 break;
10225 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10226 *first_code = GT;
10227 *second_code = UNORDERED;
10228 break;
10229 default:
10230 gcc_unreachable ();
10231 }
10232 if (!TARGET_IEEE_FP)
10233 {
10234 *second_code = UNKNOWN;
10235 *bypass_code = UNKNOWN;
10236 }
10237 }
10238
10239 /* Return cost of comparison done fcom + arithmetics operations on AX.
10240 All following functions do use number of instructions as a cost metrics.
10241 In future this should be tweaked to compute bytes for optimize_size and
10242 take into account performance of various instructions on various CPUs. */
10243 static int
10244 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10245 {
10246 if (!TARGET_IEEE_FP)
10247 return 4;
10248 /* The cost of code output by ix86_expand_fp_compare. */
10249 switch (code)
10250 {
10251 case UNLE:
10252 case UNLT:
10253 case LTGT:
10254 case GT:
10255 case GE:
10256 case UNORDERED:
10257 case ORDERED:
10258 case UNEQ:
10259 return 4;
10260 break;
10261 case LT:
10262 case NE:
10263 case EQ:
10264 case UNGE:
10265 return 5;
10266 break;
10267 case LE:
10268 case UNGT:
10269 return 6;
10270 break;
10271 default:
10272 gcc_unreachable ();
10273 }
10274 }
10275
10276 /* Return cost of comparison done using fcomi operation.
10277 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10278 static int
10279 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10280 {
10281 enum rtx_code bypass_code, first_code, second_code;
10282 /* Return arbitrarily high cost when instruction is not supported - this
10283 prevents gcc from using it. */
10284 if (!TARGET_CMOVE)
10285 return 1024;
10286 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10287 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10288 }
10289
10290 /* Return cost of comparison done using sahf operation.
10291 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10292 static int
10293 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10294 {
10295 enum rtx_code bypass_code, first_code, second_code;
10296 /* Return arbitrarily high cost when instruction is not preferred - this
10297 avoids gcc from using it. */
10298 if (!TARGET_USE_SAHF && !optimize_size)
10299 return 1024;
10300 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10301 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10302 }
10303
10304 /* Compute cost of the comparison done using any method.
10305 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10306 static int
10307 ix86_fp_comparison_cost (enum rtx_code code)
10308 {
10309 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10310 int min;
10311
10312 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10313 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10314
10315 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10316 if (min > sahf_cost)
10317 min = sahf_cost;
10318 if (min > fcomi_cost)
10319 min = fcomi_cost;
10320 return min;
10321 }
10322
10323 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10324
10325 static rtx
10326 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10327 rtx *second_test, rtx *bypass_test)
10328 {
10329 enum machine_mode fpcmp_mode, intcmp_mode;
10330 rtx tmp, tmp2;
10331 int cost = ix86_fp_comparison_cost (code);
10332 enum rtx_code bypass_code, first_code, second_code;
10333
10334 fpcmp_mode = ix86_fp_compare_mode (code);
10335 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10336
10337 if (second_test)
10338 *second_test = NULL_RTX;
10339 if (bypass_test)
10340 *bypass_test = NULL_RTX;
10341
10342 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10343
10344 /* Do fcomi/sahf based test when profitable. */
10345 if ((bypass_code == UNKNOWN || bypass_test)
10346 && (second_code == UNKNOWN || second_test)
10347 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10348 {
10349 if (TARGET_CMOVE)
10350 {
10351 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10352 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10353 tmp);
10354 emit_insn (tmp);
10355 }
10356 else
10357 {
10358 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10359 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10360 if (!scratch)
10361 scratch = gen_reg_rtx (HImode);
10362 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10363 emit_insn (gen_x86_sahf_1 (scratch));
10364 }
10365
10366 /* The FP codes work out to act like unsigned. */
10367 intcmp_mode = fpcmp_mode;
10368 code = first_code;
10369 if (bypass_code != UNKNOWN)
10370 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10371 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10372 const0_rtx);
10373 if (second_code != UNKNOWN)
10374 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10375 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10376 const0_rtx);
10377 }
10378 else
10379 {
10380 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10381 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10382 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10383 if (!scratch)
10384 scratch = gen_reg_rtx (HImode);
10385 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10386
10387 /* In the unordered case, we have to check C2 for NaN's, which
10388 doesn't happen to work out to anything nice combination-wise.
10389 So do some bit twiddling on the value we've got in AH to come
10390 up with an appropriate set of condition codes. */
10391
10392 intcmp_mode = CCNOmode;
10393 switch (code)
10394 {
10395 case GT:
10396 case UNGT:
10397 if (code == GT || !TARGET_IEEE_FP)
10398 {
10399 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10400 code = EQ;
10401 }
10402 else
10403 {
10404 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10405 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10406 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10407 intcmp_mode = CCmode;
10408 code = GEU;
10409 }
10410 break;
10411 case LT:
10412 case UNLT:
10413 if (code == LT && TARGET_IEEE_FP)
10414 {
10415 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10416 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10417 intcmp_mode = CCmode;
10418 code = EQ;
10419 }
10420 else
10421 {
10422 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10423 code = NE;
10424 }
10425 break;
10426 case GE:
10427 case UNGE:
10428 if (code == GE || !TARGET_IEEE_FP)
10429 {
10430 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10431 code = EQ;
10432 }
10433 else
10434 {
10435 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10436 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10437 GEN_INT (0x01)));
10438 code = NE;
10439 }
10440 break;
10441 case LE:
10442 case UNLE:
10443 if (code == LE && TARGET_IEEE_FP)
10444 {
10445 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10446 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10447 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10448 intcmp_mode = CCmode;
10449 code = LTU;
10450 }
10451 else
10452 {
10453 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10454 code = NE;
10455 }
10456 break;
10457 case EQ:
10458 case UNEQ:
10459 if (code == EQ && TARGET_IEEE_FP)
10460 {
10461 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10462 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10463 intcmp_mode = CCmode;
10464 code = EQ;
10465 }
10466 else
10467 {
10468 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10469 code = NE;
10470 break;
10471 }
10472 break;
10473 case NE:
10474 case LTGT:
10475 if (code == NE && TARGET_IEEE_FP)
10476 {
10477 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10478 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10479 GEN_INT (0x40)));
10480 code = NE;
10481 }
10482 else
10483 {
10484 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10485 code = EQ;
10486 }
10487 break;
10488
10489 case UNORDERED:
10490 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10491 code = NE;
10492 break;
10493 case ORDERED:
10494 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10495 code = EQ;
10496 break;
10497
10498 default:
10499 gcc_unreachable ();
10500 }
10501 }
10502
10503 /* Return the test that should be put into the flags user, i.e.
10504 the bcc, scc, or cmov instruction. */
10505 return gen_rtx_fmt_ee (code, VOIDmode,
10506 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10507 const0_rtx);
10508 }
10509
10510 rtx
10511 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10512 {
10513 rtx op0, op1, ret;
10514 op0 = ix86_compare_op0;
10515 op1 = ix86_compare_op1;
10516
10517 if (second_test)
10518 *second_test = NULL_RTX;
10519 if (bypass_test)
10520 *bypass_test = NULL_RTX;
10521
10522 if (ix86_compare_emitted)
10523 {
10524 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10525 ix86_compare_emitted = NULL_RTX;
10526 }
10527 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10528 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10529 second_test, bypass_test);
10530 else
10531 ret = ix86_expand_int_compare (code, op0, op1);
10532
10533 return ret;
10534 }
10535
10536 /* Return true if the CODE will result in nontrivial jump sequence. */
10537 bool
10538 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10539 {
10540 enum rtx_code bypass_code, first_code, second_code;
10541 if (!TARGET_CMOVE)
10542 return true;
10543 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10544 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10545 }
10546
10547 void
10548 ix86_expand_branch (enum rtx_code code, rtx label)
10549 {
10550 rtx tmp;
10551
10552 /* If we have emitted a compare insn, go straight to simple.
10553 ix86_expand_compare won't emit anything if ix86_compare_emitted
10554 is non NULL. */
10555 if (ix86_compare_emitted)
10556 goto simple;
10557
10558 switch (GET_MODE (ix86_compare_op0))
10559 {
10560 case QImode:
10561 case HImode:
10562 case SImode:
10563 simple:
10564 tmp = ix86_expand_compare (code, NULL, NULL);
10565 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10566 gen_rtx_LABEL_REF (VOIDmode, label),
10567 pc_rtx);
10568 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10569 return;
10570
10571 case SFmode:
10572 case DFmode:
10573 case XFmode:
10574 {
10575 rtvec vec;
10576 int use_fcomi;
10577 enum rtx_code bypass_code, first_code, second_code;
10578
10579 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10580 &ix86_compare_op1);
10581
10582 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10583
10584 /* Check whether we will use the natural sequence with one jump. If
10585 so, we can expand jump early. Otherwise delay expansion by
10586 creating compound insn to not confuse optimizers. */
10587 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10588 && TARGET_CMOVE)
10589 {
10590 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10591 gen_rtx_LABEL_REF (VOIDmode, label),
10592 pc_rtx, NULL_RTX, NULL_RTX);
10593 }
10594 else
10595 {
10596 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10597 ix86_compare_op0, ix86_compare_op1);
10598 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10599 gen_rtx_LABEL_REF (VOIDmode, label),
10600 pc_rtx);
10601 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10602
10603 use_fcomi = ix86_use_fcomi_compare (code);
10604 vec = rtvec_alloc (3 + !use_fcomi);
10605 RTVEC_ELT (vec, 0) = tmp;
10606 RTVEC_ELT (vec, 1)
10607 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10608 RTVEC_ELT (vec, 2)
10609 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10610 if (! use_fcomi)
10611 RTVEC_ELT (vec, 3)
10612 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10613
10614 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10615 }
10616 return;
10617 }
10618
10619 case DImode:
10620 if (TARGET_64BIT)
10621 goto simple;
10622 case TImode:
10623 /* Expand DImode branch into multiple compare+branch. */
10624 {
10625 rtx lo[2], hi[2], label2;
10626 enum rtx_code code1, code2, code3;
10627 enum machine_mode submode;
10628
10629 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10630 {
10631 tmp = ix86_compare_op0;
10632 ix86_compare_op0 = ix86_compare_op1;
10633 ix86_compare_op1 = tmp;
10634 code = swap_condition (code);
10635 }
10636 if (GET_MODE (ix86_compare_op0) == DImode)
10637 {
10638 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10639 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10640 submode = SImode;
10641 }
10642 else
10643 {
10644 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10645 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10646 submode = DImode;
10647 }
10648
10649 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10650 avoid two branches. This costs one extra insn, so disable when
10651 optimizing for size. */
10652
10653 if ((code == EQ || code == NE)
10654 && (!optimize_size
10655 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10656 {
10657 rtx xor0, xor1;
10658
10659 xor1 = hi[0];
10660 if (hi[1] != const0_rtx)
10661 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10662 NULL_RTX, 0, OPTAB_WIDEN);
10663
10664 xor0 = lo[0];
10665 if (lo[1] != const0_rtx)
10666 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10667 NULL_RTX, 0, OPTAB_WIDEN);
10668
10669 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10670 NULL_RTX, 0, OPTAB_WIDEN);
10671
10672 ix86_compare_op0 = tmp;
10673 ix86_compare_op1 = const0_rtx;
10674 ix86_expand_branch (code, label);
10675 return;
10676 }
10677
10678 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10679 op1 is a constant and the low word is zero, then we can just
10680 examine the high word. */
10681
10682 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10683 switch (code)
10684 {
10685 case LT: case LTU: case GE: case GEU:
10686 ix86_compare_op0 = hi[0];
10687 ix86_compare_op1 = hi[1];
10688 ix86_expand_branch (code, label);
10689 return;
10690 default:
10691 break;
10692 }
10693
10694 /* Otherwise, we need two or three jumps. */
10695
10696 label2 = gen_label_rtx ();
10697
10698 code1 = code;
10699 code2 = swap_condition (code);
10700 code3 = unsigned_condition (code);
10701
10702 switch (code)
10703 {
10704 case LT: case GT: case LTU: case GTU:
10705 break;
10706
10707 case LE: code1 = LT; code2 = GT; break;
10708 case GE: code1 = GT; code2 = LT; break;
10709 case LEU: code1 = LTU; code2 = GTU; break;
10710 case GEU: code1 = GTU; code2 = LTU; break;
10711
10712 case EQ: code1 = UNKNOWN; code2 = NE; break;
10713 case NE: code2 = UNKNOWN; break;
10714
10715 default:
10716 gcc_unreachable ();
10717 }
10718
10719 /*
10720 * a < b =>
10721 * if (hi(a) < hi(b)) goto true;
10722 * if (hi(a) > hi(b)) goto false;
10723 * if (lo(a) < lo(b)) goto true;
10724 * false:
10725 */
10726
10727 ix86_compare_op0 = hi[0];
10728 ix86_compare_op1 = hi[1];
10729
10730 if (code1 != UNKNOWN)
10731 ix86_expand_branch (code1, label);
10732 if (code2 != UNKNOWN)
10733 ix86_expand_branch (code2, label2);
10734
10735 ix86_compare_op0 = lo[0];
10736 ix86_compare_op1 = lo[1];
10737 ix86_expand_branch (code3, label);
10738
10739 if (code2 != UNKNOWN)
10740 emit_label (label2);
10741 return;
10742 }
10743
10744 default:
10745 gcc_unreachable ();
10746 }
10747 }
10748
10749 /* Split branch based on floating point condition. */
10750 void
10751 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10752 rtx target1, rtx target2, rtx tmp, rtx pushed)
10753 {
10754 rtx second, bypass;
10755 rtx label = NULL_RTX;
10756 rtx condition;
10757 int bypass_probability = -1, second_probability = -1, probability = -1;
10758 rtx i;
10759
10760 if (target2 != pc_rtx)
10761 {
10762 rtx tmp = target2;
10763 code = reverse_condition_maybe_unordered (code);
10764 target2 = target1;
10765 target1 = tmp;
10766 }
10767
10768 condition = ix86_expand_fp_compare (code, op1, op2,
10769 tmp, &second, &bypass);
10770
10771 /* Remove pushed operand from stack. */
10772 if (pushed)
10773 ix86_free_from_memory (GET_MODE (pushed));
10774
10775 if (split_branch_probability >= 0)
10776 {
10777 /* Distribute the probabilities across the jumps.
10778 Assume the BYPASS and SECOND to be always test
10779 for UNORDERED. */
10780 probability = split_branch_probability;
10781
10782 /* Value of 1 is low enough to make no need for probability
10783 to be updated. Later we may run some experiments and see
10784 if unordered values are more frequent in practice. */
10785 if (bypass)
10786 bypass_probability = 1;
10787 if (second)
10788 second_probability = 1;
10789 }
10790 if (bypass != NULL_RTX)
10791 {
10792 label = gen_label_rtx ();
10793 i = emit_jump_insn (gen_rtx_SET
10794 (VOIDmode, pc_rtx,
10795 gen_rtx_IF_THEN_ELSE (VOIDmode,
10796 bypass,
10797 gen_rtx_LABEL_REF (VOIDmode,
10798 label),
10799 pc_rtx)));
10800 if (bypass_probability >= 0)
10801 REG_NOTES (i)
10802 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10803 GEN_INT (bypass_probability),
10804 REG_NOTES (i));
10805 }
10806 i = emit_jump_insn (gen_rtx_SET
10807 (VOIDmode, pc_rtx,
10808 gen_rtx_IF_THEN_ELSE (VOIDmode,
10809 condition, target1, target2)));
10810 if (probability >= 0)
10811 REG_NOTES (i)
10812 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10813 GEN_INT (probability),
10814 REG_NOTES (i));
10815 if (second != NULL_RTX)
10816 {
10817 i = emit_jump_insn (gen_rtx_SET
10818 (VOIDmode, pc_rtx,
10819 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10820 target2)));
10821 if (second_probability >= 0)
10822 REG_NOTES (i)
10823 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10824 GEN_INT (second_probability),
10825 REG_NOTES (i));
10826 }
10827 if (label != NULL_RTX)
10828 emit_label (label);
10829 }
10830
10831 int
10832 ix86_expand_setcc (enum rtx_code code, rtx dest)
10833 {
10834 rtx ret, tmp, tmpreg, equiv;
10835 rtx second_test, bypass_test;
10836
10837 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10838 return 0; /* FAIL */
10839
10840 gcc_assert (GET_MODE (dest) == QImode);
10841
10842 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10843 PUT_MODE (ret, QImode);
10844
10845 tmp = dest;
10846 tmpreg = dest;
10847
10848 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10849 if (bypass_test || second_test)
10850 {
10851 rtx test = second_test;
10852 int bypass = 0;
10853 rtx tmp2 = gen_reg_rtx (QImode);
10854 if (bypass_test)
10855 {
10856 gcc_assert (!second_test);
10857 test = bypass_test;
10858 bypass = 1;
10859 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10860 }
10861 PUT_MODE (test, QImode);
10862 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10863
10864 if (bypass)
10865 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10866 else
10867 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10868 }
10869
10870 /* Attach a REG_EQUAL note describing the comparison result. */
10871 if (ix86_compare_op0 && ix86_compare_op1)
10872 {
10873 equiv = simplify_gen_relational (code, QImode,
10874 GET_MODE (ix86_compare_op0),
10875 ix86_compare_op0, ix86_compare_op1);
10876 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10877 }
10878
10879 return 1; /* DONE */
10880 }
10881
10882 /* Expand comparison setting or clearing carry flag. Return true when
10883 successful and set pop for the operation. */
10884 static bool
10885 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10886 {
10887 enum machine_mode mode =
10888 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10889
10890 /* Do not handle DImode compares that go through special path. Also we can't
10891 deal with FP compares yet. This is possible to add. */
10892 if (mode == (TARGET_64BIT ? TImode : DImode))
10893 return false;
10894 if (FLOAT_MODE_P (mode))
10895 {
10896 rtx second_test = NULL, bypass_test = NULL;
10897 rtx compare_op, compare_seq;
10898
10899 /* Shortcut: following common codes never translate into carry flag compares. */
10900 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10901 || code == ORDERED || code == UNORDERED)
10902 return false;
10903
10904 /* These comparisons require zero flag; swap operands so they won't. */
10905 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10906 && !TARGET_IEEE_FP)
10907 {
10908 rtx tmp = op0;
10909 op0 = op1;
10910 op1 = tmp;
10911 code = swap_condition (code);
10912 }
10913
10914 /* Try to expand the comparison and verify that we end up with carry flag
10915 based comparison. This is fails to be true only when we decide to expand
10916 comparison using arithmetic that is not too common scenario. */
10917 start_sequence ();
10918 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10919 &second_test, &bypass_test);
10920 compare_seq = get_insns ();
10921 end_sequence ();
10922
10923 if (second_test || bypass_test)
10924 return false;
10925 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10926 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10927 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10928 else
10929 code = GET_CODE (compare_op);
10930 if (code != LTU && code != GEU)
10931 return false;
10932 emit_insn (compare_seq);
10933 *pop = compare_op;
10934 return true;
10935 }
10936 if (!INTEGRAL_MODE_P (mode))
10937 return false;
10938 switch (code)
10939 {
10940 case LTU:
10941 case GEU:
10942 break;
10943
10944 /* Convert a==0 into (unsigned)a<1. */
10945 case EQ:
10946 case NE:
10947 if (op1 != const0_rtx)
10948 return false;
10949 op1 = const1_rtx;
10950 code = (code == EQ ? LTU : GEU);
10951 break;
10952
10953 /* Convert a>b into b<a or a>=b-1. */
10954 case GTU:
10955 case LEU:
10956 if (GET_CODE (op1) == CONST_INT)
10957 {
10958 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10959 /* Bail out on overflow. We still can swap operands but that
10960 would force loading of the constant into register. */
10961 if (op1 == const0_rtx
10962 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10963 return false;
10964 code = (code == GTU ? GEU : LTU);
10965 }
10966 else
10967 {
10968 rtx tmp = op1;
10969 op1 = op0;
10970 op0 = tmp;
10971 code = (code == GTU ? LTU : GEU);
10972 }
10973 break;
10974
10975 /* Convert a>=0 into (unsigned)a<0x80000000. */
10976 case LT:
10977 case GE:
10978 if (mode == DImode || op1 != const0_rtx)
10979 return false;
10980 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10981 code = (code == LT ? GEU : LTU);
10982 break;
10983 case LE:
10984 case GT:
10985 if (mode == DImode || op1 != constm1_rtx)
10986 return false;
10987 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10988 code = (code == LE ? GEU : LTU);
10989 break;
10990
10991 default:
10992 return false;
10993 }
10994 /* Swapping operands may cause constant to appear as first operand. */
10995 if (!nonimmediate_operand (op0, VOIDmode))
10996 {
10997 if (no_new_pseudos)
10998 return false;
10999 op0 = force_reg (mode, op0);
11000 }
11001 ix86_compare_op0 = op0;
11002 ix86_compare_op1 = op1;
11003 *pop = ix86_expand_compare (code, NULL, NULL);
11004 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11005 return true;
11006 }
11007
11008 int
11009 ix86_expand_int_movcc (rtx operands[])
11010 {
11011 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11012 rtx compare_seq, compare_op;
11013 rtx second_test, bypass_test;
11014 enum machine_mode mode = GET_MODE (operands[0]);
11015 bool sign_bit_compare_p = false;;
11016
11017 start_sequence ();
11018 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11019 compare_seq = get_insns ();
11020 end_sequence ();
11021
11022 compare_code = GET_CODE (compare_op);
11023
11024 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11025 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11026 sign_bit_compare_p = true;
11027
11028 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11029 HImode insns, we'd be swallowed in word prefix ops. */
11030
11031 if ((mode != HImode || TARGET_FAST_PREFIX)
11032 && (mode != (TARGET_64BIT ? TImode : DImode))
11033 && GET_CODE (operands[2]) == CONST_INT
11034 && GET_CODE (operands[3]) == CONST_INT)
11035 {
11036 rtx out = operands[0];
11037 HOST_WIDE_INT ct = INTVAL (operands[2]);
11038 HOST_WIDE_INT cf = INTVAL (operands[3]);
11039 HOST_WIDE_INT diff;
11040
11041 diff = ct - cf;
11042 /* Sign bit compares are better done using shifts than we do by using
11043 sbb. */
11044 if (sign_bit_compare_p
11045 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11046 ix86_compare_op1, &compare_op))
11047 {
11048 /* Detect overlap between destination and compare sources. */
11049 rtx tmp = out;
11050
11051 if (!sign_bit_compare_p)
11052 {
11053 bool fpcmp = false;
11054
11055 compare_code = GET_CODE (compare_op);
11056
11057 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11058 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11059 {
11060 fpcmp = true;
11061 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11062 }
11063
11064 /* To simplify rest of code, restrict to the GEU case. */
11065 if (compare_code == LTU)
11066 {
11067 HOST_WIDE_INT tmp = ct;
11068 ct = cf;
11069 cf = tmp;
11070 compare_code = reverse_condition (compare_code);
11071 code = reverse_condition (code);
11072 }
11073 else
11074 {
11075 if (fpcmp)
11076 PUT_CODE (compare_op,
11077 reverse_condition_maybe_unordered
11078 (GET_CODE (compare_op)));
11079 else
11080 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11081 }
11082 diff = ct - cf;
11083
11084 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11085 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11086 tmp = gen_reg_rtx (mode);
11087
11088 if (mode == DImode)
11089 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11090 else
11091 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11092 }
11093 else
11094 {
11095 if (code == GT || code == GE)
11096 code = reverse_condition (code);
11097 else
11098 {
11099 HOST_WIDE_INT tmp = ct;
11100 ct = cf;
11101 cf = tmp;
11102 diff = ct - cf;
11103 }
11104 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11105 ix86_compare_op1, VOIDmode, 0, -1);
11106 }
11107
11108 if (diff == 1)
11109 {
11110 /*
11111 * cmpl op0,op1
11112 * sbbl dest,dest
11113 * [addl dest, ct]
11114 *
11115 * Size 5 - 8.
11116 */
11117 if (ct)
11118 tmp = expand_simple_binop (mode, PLUS,
11119 tmp, GEN_INT (ct),
11120 copy_rtx (tmp), 1, OPTAB_DIRECT);
11121 }
11122 else if (cf == -1)
11123 {
11124 /*
11125 * cmpl op0,op1
11126 * sbbl dest,dest
11127 * orl $ct, dest
11128 *
11129 * Size 8.
11130 */
11131 tmp = expand_simple_binop (mode, IOR,
11132 tmp, GEN_INT (ct),
11133 copy_rtx (tmp), 1, OPTAB_DIRECT);
11134 }
11135 else if (diff == -1 && ct)
11136 {
11137 /*
11138 * cmpl op0,op1
11139 * sbbl dest,dest
11140 * notl dest
11141 * [addl dest, cf]
11142 *
11143 * Size 8 - 11.
11144 */
11145 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11146 if (cf)
11147 tmp = expand_simple_binop (mode, PLUS,
11148 copy_rtx (tmp), GEN_INT (cf),
11149 copy_rtx (tmp), 1, OPTAB_DIRECT);
11150 }
11151 else
11152 {
11153 /*
11154 * cmpl op0,op1
11155 * sbbl dest,dest
11156 * [notl dest]
11157 * andl cf - ct, dest
11158 * [addl dest, ct]
11159 *
11160 * Size 8 - 11.
11161 */
11162
11163 if (cf == 0)
11164 {
11165 cf = ct;
11166 ct = 0;
11167 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11168 }
11169
11170 tmp = expand_simple_binop (mode, AND,
11171 copy_rtx (tmp),
11172 gen_int_mode (cf - ct, mode),
11173 copy_rtx (tmp), 1, OPTAB_DIRECT);
11174 if (ct)
11175 tmp = expand_simple_binop (mode, PLUS,
11176 copy_rtx (tmp), GEN_INT (ct),
11177 copy_rtx (tmp), 1, OPTAB_DIRECT);
11178 }
11179
11180 if (!rtx_equal_p (tmp, out))
11181 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11182
11183 return 1; /* DONE */
11184 }
11185
11186 if (diff < 0)
11187 {
11188 HOST_WIDE_INT tmp;
11189 tmp = ct, ct = cf, cf = tmp;
11190 diff = -diff;
11191 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11192 {
11193 /* We may be reversing unordered compare to normal compare, that
11194 is not valid in general (we may convert non-trapping condition
11195 to trapping one), however on i386 we currently emit all
11196 comparisons unordered. */
11197 compare_code = reverse_condition_maybe_unordered (compare_code);
11198 code = reverse_condition_maybe_unordered (code);
11199 }
11200 else
11201 {
11202 compare_code = reverse_condition (compare_code);
11203 code = reverse_condition (code);
11204 }
11205 }
11206
11207 compare_code = UNKNOWN;
11208 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11209 && GET_CODE (ix86_compare_op1) == CONST_INT)
11210 {
11211 if (ix86_compare_op1 == const0_rtx
11212 && (code == LT || code == GE))
11213 compare_code = code;
11214 else if (ix86_compare_op1 == constm1_rtx)
11215 {
11216 if (code == LE)
11217 compare_code = LT;
11218 else if (code == GT)
11219 compare_code = GE;
11220 }
11221 }
11222
11223 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11224 if (compare_code != UNKNOWN
11225 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11226 && (cf == -1 || ct == -1))
11227 {
11228 /* If lea code below could be used, only optimize
11229 if it results in a 2 insn sequence. */
11230
11231 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11232 || diff == 3 || diff == 5 || diff == 9)
11233 || (compare_code == LT && ct == -1)
11234 || (compare_code == GE && cf == -1))
11235 {
11236 /*
11237 * notl op1 (if necessary)
11238 * sarl $31, op1
11239 * orl cf, op1
11240 */
11241 if (ct != -1)
11242 {
11243 cf = ct;
11244 ct = -1;
11245 code = reverse_condition (code);
11246 }
11247
11248 out = emit_store_flag (out, code, ix86_compare_op0,
11249 ix86_compare_op1, VOIDmode, 0, -1);
11250
11251 out = expand_simple_binop (mode, IOR,
11252 out, GEN_INT (cf),
11253 out, 1, OPTAB_DIRECT);
11254 if (out != operands[0])
11255 emit_move_insn (operands[0], out);
11256
11257 return 1; /* DONE */
11258 }
11259 }
11260
11261
11262 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11263 || diff == 3 || diff == 5 || diff == 9)
11264 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11265 && (mode != DImode
11266 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11267 {
11268 /*
11269 * xorl dest,dest
11270 * cmpl op1,op2
11271 * setcc dest
11272 * lea cf(dest*(ct-cf)),dest
11273 *
11274 * Size 14.
11275 *
11276 * This also catches the degenerate setcc-only case.
11277 */
11278
11279 rtx tmp;
11280 int nops;
11281
11282 out = emit_store_flag (out, code, ix86_compare_op0,
11283 ix86_compare_op1, VOIDmode, 0, 1);
11284
11285 nops = 0;
11286 /* On x86_64 the lea instruction operates on Pmode, so we need
11287 to get arithmetics done in proper mode to match. */
11288 if (diff == 1)
11289 tmp = copy_rtx (out);
11290 else
11291 {
11292 rtx out1;
11293 out1 = copy_rtx (out);
11294 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11295 nops++;
11296 if (diff & 1)
11297 {
11298 tmp = gen_rtx_PLUS (mode, tmp, out1);
11299 nops++;
11300 }
11301 }
11302 if (cf != 0)
11303 {
11304 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11305 nops++;
11306 }
11307 if (!rtx_equal_p (tmp, out))
11308 {
11309 if (nops == 1)
11310 out = force_operand (tmp, copy_rtx (out));
11311 else
11312 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11313 }
11314 if (!rtx_equal_p (out, operands[0]))
11315 emit_move_insn (operands[0], copy_rtx (out));
11316
11317 return 1; /* DONE */
11318 }
11319
11320 /*
11321 * General case: Jumpful:
11322 * xorl dest,dest cmpl op1, op2
11323 * cmpl op1, op2 movl ct, dest
11324 * setcc dest jcc 1f
11325 * decl dest movl cf, dest
11326 * andl (cf-ct),dest 1:
11327 * addl ct,dest
11328 *
11329 * Size 20. Size 14.
11330 *
11331 * This is reasonably steep, but branch mispredict costs are
11332 * high on modern cpus, so consider failing only if optimizing
11333 * for space.
11334 */
11335
11336 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11337 && BRANCH_COST >= 2)
11338 {
11339 if (cf == 0)
11340 {
11341 cf = ct;
11342 ct = 0;
11343 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11344 /* We may be reversing unordered compare to normal compare,
11345 that is not valid in general (we may convert non-trapping
11346 condition to trapping one), however on i386 we currently
11347 emit all comparisons unordered. */
11348 code = reverse_condition_maybe_unordered (code);
11349 else
11350 {
11351 code = reverse_condition (code);
11352 if (compare_code != UNKNOWN)
11353 compare_code = reverse_condition (compare_code);
11354 }
11355 }
11356
11357 if (compare_code != UNKNOWN)
11358 {
11359 /* notl op1 (if needed)
11360 sarl $31, op1
11361 andl (cf-ct), op1
11362 addl ct, op1
11363
11364 For x < 0 (resp. x <= -1) there will be no notl,
11365 so if possible swap the constants to get rid of the
11366 complement.
11367 True/false will be -1/0 while code below (store flag
11368 followed by decrement) is 0/-1, so the constants need
11369 to be exchanged once more. */
11370
11371 if (compare_code == GE || !cf)
11372 {
11373 code = reverse_condition (code);
11374 compare_code = LT;
11375 }
11376 else
11377 {
11378 HOST_WIDE_INT tmp = cf;
11379 cf = ct;
11380 ct = tmp;
11381 }
11382
11383 out = emit_store_flag (out, code, ix86_compare_op0,
11384 ix86_compare_op1, VOIDmode, 0, -1);
11385 }
11386 else
11387 {
11388 out = emit_store_flag (out, code, ix86_compare_op0,
11389 ix86_compare_op1, VOIDmode, 0, 1);
11390
11391 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11392 copy_rtx (out), 1, OPTAB_DIRECT);
11393 }
11394
11395 out = expand_simple_binop (mode, AND, copy_rtx (out),
11396 gen_int_mode (cf - ct, mode),
11397 copy_rtx (out), 1, OPTAB_DIRECT);
11398 if (ct)
11399 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11400 copy_rtx (out), 1, OPTAB_DIRECT);
11401 if (!rtx_equal_p (out, operands[0]))
11402 emit_move_insn (operands[0], copy_rtx (out));
11403
11404 return 1; /* DONE */
11405 }
11406 }
11407
11408 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11409 {
11410 /* Try a few things more with specific constants and a variable. */
11411
11412 optab op;
11413 rtx var, orig_out, out, tmp;
11414
11415 if (BRANCH_COST <= 2)
11416 return 0; /* FAIL */
11417
11418 /* If one of the two operands is an interesting constant, load a
11419 constant with the above and mask it in with a logical operation. */
11420
11421 if (GET_CODE (operands[2]) == CONST_INT)
11422 {
11423 var = operands[3];
11424 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11425 operands[3] = constm1_rtx, op = and_optab;
11426 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11427 operands[3] = const0_rtx, op = ior_optab;
11428 else
11429 return 0; /* FAIL */
11430 }
11431 else if (GET_CODE (operands[3]) == CONST_INT)
11432 {
11433 var = operands[2];
11434 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11435 operands[2] = constm1_rtx, op = and_optab;
11436 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11437 operands[2] = const0_rtx, op = ior_optab;
11438 else
11439 return 0; /* FAIL */
11440 }
11441 else
11442 return 0; /* FAIL */
11443
11444 orig_out = operands[0];
11445 tmp = gen_reg_rtx (mode);
11446 operands[0] = tmp;
11447
11448 /* Recurse to get the constant loaded. */
11449 if (ix86_expand_int_movcc (operands) == 0)
11450 return 0; /* FAIL */
11451
11452 /* Mask in the interesting variable. */
11453 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11454 OPTAB_WIDEN);
11455 if (!rtx_equal_p (out, orig_out))
11456 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11457
11458 return 1; /* DONE */
11459 }
11460
11461 /*
11462 * For comparison with above,
11463 *
11464 * movl cf,dest
11465 * movl ct,tmp
11466 * cmpl op1,op2
11467 * cmovcc tmp,dest
11468 *
11469 * Size 15.
11470 */
11471
11472 if (! nonimmediate_operand (operands[2], mode))
11473 operands[2] = force_reg (mode, operands[2]);
11474 if (! nonimmediate_operand (operands[3], mode))
11475 operands[3] = force_reg (mode, operands[3]);
11476
11477 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11478 {
11479 rtx tmp = gen_reg_rtx (mode);
11480 emit_move_insn (tmp, operands[3]);
11481 operands[3] = tmp;
11482 }
11483 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11484 {
11485 rtx tmp = gen_reg_rtx (mode);
11486 emit_move_insn (tmp, operands[2]);
11487 operands[2] = tmp;
11488 }
11489
11490 if (! register_operand (operands[2], VOIDmode)
11491 && (mode == QImode
11492 || ! register_operand (operands[3], VOIDmode)))
11493 operands[2] = force_reg (mode, operands[2]);
11494
11495 if (mode == QImode
11496 && ! register_operand (operands[3], VOIDmode))
11497 operands[3] = force_reg (mode, operands[3]);
11498
11499 emit_insn (compare_seq);
11500 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11501 gen_rtx_IF_THEN_ELSE (mode,
11502 compare_op, operands[2],
11503 operands[3])));
11504 if (bypass_test)
11505 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11506 gen_rtx_IF_THEN_ELSE (mode,
11507 bypass_test,
11508 copy_rtx (operands[3]),
11509 copy_rtx (operands[0]))));
11510 if (second_test)
11511 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11512 gen_rtx_IF_THEN_ELSE (mode,
11513 second_test,
11514 copy_rtx (operands[2]),
11515 copy_rtx (operands[0]))));
11516
11517 return 1; /* DONE */
11518 }
11519
11520 /* Swap, force into registers, or otherwise massage the two operands
11521 to an sse comparison with a mask result. Thus we differ a bit from
11522 ix86_prepare_fp_compare_args which expects to produce a flags result.
11523
11524 The DEST operand exists to help determine whether to commute commutative
11525 operators. The POP0/POP1 operands are updated in place. The new
11526 comparison code is returned, or UNKNOWN if not implementable. */
11527
11528 static enum rtx_code
11529 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11530 rtx *pop0, rtx *pop1)
11531 {
11532 rtx tmp;
11533
11534 switch (code)
11535 {
11536 case LTGT:
11537 case UNEQ:
11538 /* We have no LTGT as an operator. We could implement it with
11539 NE & ORDERED, but this requires an extra temporary. It's
11540 not clear that it's worth it. */
11541 return UNKNOWN;
11542
11543 case LT:
11544 case LE:
11545 case UNGT:
11546 case UNGE:
11547 /* These are supported directly. */
11548 break;
11549
11550 case EQ:
11551 case NE:
11552 case UNORDERED:
11553 case ORDERED:
11554 /* For commutative operators, try to canonicalize the destination
11555 operand to be first in the comparison - this helps reload to
11556 avoid extra moves. */
11557 if (!dest || !rtx_equal_p (dest, *pop1))
11558 break;
11559 /* FALLTHRU */
11560
11561 case GE:
11562 case GT:
11563 case UNLE:
11564 case UNLT:
11565 /* These are not supported directly. Swap the comparison operands
11566 to transform into something that is supported. */
11567 tmp = *pop0;
11568 *pop0 = *pop1;
11569 *pop1 = tmp;
11570 code = swap_condition (code);
11571 break;
11572
11573 default:
11574 gcc_unreachable ();
11575 }
11576
11577 return code;
11578 }
11579
11580 /* Detect conditional moves that exactly match min/max operational
11581 semantics. Note that this is IEEE safe, as long as we don't
11582 interchange the operands.
11583
11584 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11585 and TRUE if the operation is successful and instructions are emitted. */
11586
11587 static bool
11588 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11589 rtx cmp_op1, rtx if_true, rtx if_false)
11590 {
11591 enum machine_mode mode;
11592 bool is_min;
11593 rtx tmp;
11594
11595 if (code == LT)
11596 ;
11597 else if (code == UNGE)
11598 {
11599 tmp = if_true;
11600 if_true = if_false;
11601 if_false = tmp;
11602 }
11603 else
11604 return false;
11605
11606 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11607 is_min = true;
11608 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11609 is_min = false;
11610 else
11611 return false;
11612
11613 mode = GET_MODE (dest);
11614
11615 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11616 but MODE may be a vector mode and thus not appropriate. */
11617 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11618 {
11619 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11620 rtvec v;
11621
11622 if_true = force_reg (mode, if_true);
11623 v = gen_rtvec (2, if_true, if_false);
11624 tmp = gen_rtx_UNSPEC (mode, v, u);
11625 }
11626 else
11627 {
11628 code = is_min ? SMIN : SMAX;
11629 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11630 }
11631
11632 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11633 return true;
11634 }
11635
11636 /* Expand an sse vector comparison. Return the register with the result. */
11637
11638 static rtx
11639 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11640 rtx op_true, rtx op_false)
11641 {
11642 enum machine_mode mode = GET_MODE (dest);
11643 rtx x;
11644
11645 cmp_op0 = force_reg (mode, cmp_op0);
11646 if (!nonimmediate_operand (cmp_op1, mode))
11647 cmp_op1 = force_reg (mode, cmp_op1);
11648
11649 if (optimize
11650 || reg_overlap_mentioned_p (dest, op_true)
11651 || reg_overlap_mentioned_p (dest, op_false))
11652 dest = gen_reg_rtx (mode);
11653
11654 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11655 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11656
11657 return dest;
11658 }
11659
11660 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11661 operations. This is used for both scalar and vector conditional moves. */
11662
11663 static void
11664 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11665 {
11666 enum machine_mode mode = GET_MODE (dest);
11667 rtx t2, t3, x;
11668
11669 if (op_false == CONST0_RTX (mode))
11670 {
11671 op_true = force_reg (mode, op_true);
11672 x = gen_rtx_AND (mode, cmp, op_true);
11673 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11674 }
11675 else if (op_true == CONST0_RTX (mode))
11676 {
11677 op_false = force_reg (mode, op_false);
11678 x = gen_rtx_NOT (mode, cmp);
11679 x = gen_rtx_AND (mode, x, op_false);
11680 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11681 }
11682 else
11683 {
11684 op_true = force_reg (mode, op_true);
11685 op_false = force_reg (mode, op_false);
11686
11687 t2 = gen_reg_rtx (mode);
11688 if (optimize)
11689 t3 = gen_reg_rtx (mode);
11690 else
11691 t3 = dest;
11692
11693 x = gen_rtx_AND (mode, op_true, cmp);
11694 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11695
11696 x = gen_rtx_NOT (mode, cmp);
11697 x = gen_rtx_AND (mode, x, op_false);
11698 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11699
11700 x = gen_rtx_IOR (mode, t3, t2);
11701 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11702 }
11703 }
11704
11705 /* Expand a floating-point conditional move. Return true if successful. */
11706
11707 int
11708 ix86_expand_fp_movcc (rtx operands[])
11709 {
11710 enum machine_mode mode = GET_MODE (operands[0]);
11711 enum rtx_code code = GET_CODE (operands[1]);
11712 rtx tmp, compare_op, second_test, bypass_test;
11713
11714 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11715 {
11716 enum machine_mode cmode;
11717
11718 /* Since we've no cmove for sse registers, don't force bad register
11719 allocation just to gain access to it. Deny movcc when the
11720 comparison mode doesn't match the move mode. */
11721 cmode = GET_MODE (ix86_compare_op0);
11722 if (cmode == VOIDmode)
11723 cmode = GET_MODE (ix86_compare_op1);
11724 if (cmode != mode)
11725 return 0;
11726
11727 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11728 &ix86_compare_op0,
11729 &ix86_compare_op1);
11730 if (code == UNKNOWN)
11731 return 0;
11732
11733 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11734 ix86_compare_op1, operands[2],
11735 operands[3]))
11736 return 1;
11737
11738 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11739 ix86_compare_op1, operands[2], operands[3]);
11740 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11741 return 1;
11742 }
11743
11744 /* The floating point conditional move instructions don't directly
11745 support conditions resulting from a signed integer comparison. */
11746
11747 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11748
11749 /* The floating point conditional move instructions don't directly
11750 support signed integer comparisons. */
11751
11752 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11753 {
11754 gcc_assert (!second_test && !bypass_test);
11755 tmp = gen_reg_rtx (QImode);
11756 ix86_expand_setcc (code, tmp);
11757 code = NE;
11758 ix86_compare_op0 = tmp;
11759 ix86_compare_op1 = const0_rtx;
11760 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11761 }
11762 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11763 {
11764 tmp = gen_reg_rtx (mode);
11765 emit_move_insn (tmp, operands[3]);
11766 operands[3] = tmp;
11767 }
11768 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11769 {
11770 tmp = gen_reg_rtx (mode);
11771 emit_move_insn (tmp, operands[2]);
11772 operands[2] = tmp;
11773 }
11774
11775 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11776 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11777 operands[2], operands[3])));
11778 if (bypass_test)
11779 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11780 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11781 operands[3], operands[0])));
11782 if (second_test)
11783 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11784 gen_rtx_IF_THEN_ELSE (mode, second_test,
11785 operands[2], operands[0])));
11786
11787 return 1;
11788 }
11789
11790 /* Expand a floating-point vector conditional move; a vcond operation
11791 rather than a movcc operation. */
11792
11793 bool
11794 ix86_expand_fp_vcond (rtx operands[])
11795 {
11796 enum rtx_code code = GET_CODE (operands[3]);
11797 rtx cmp;
11798
11799 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11800 &operands[4], &operands[5]);
11801 if (code == UNKNOWN)
11802 return false;
11803
11804 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11805 operands[5], operands[1], operands[2]))
11806 return true;
11807
11808 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11809 operands[1], operands[2]);
11810 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11811 return true;
11812 }
11813
11814 /* Expand a signed integral vector conditional move. */
11815
11816 bool
11817 ix86_expand_int_vcond (rtx operands[])
11818 {
11819 enum machine_mode mode = GET_MODE (operands[0]);
11820 enum rtx_code code = GET_CODE (operands[3]);
11821 bool negate = false;
11822 rtx x, cop0, cop1;
11823
11824 cop0 = operands[4];
11825 cop1 = operands[5];
11826
11827 /* Canonicalize the comparison to EQ, GT, GTU. */
11828 switch (code)
11829 {
11830 case EQ:
11831 case GT:
11832 case GTU:
11833 break;
11834
11835 case NE:
11836 case LE:
11837 case LEU:
11838 code = reverse_condition (code);
11839 negate = true;
11840 break;
11841
11842 case GE:
11843 case GEU:
11844 code = reverse_condition (code);
11845 negate = true;
11846 /* FALLTHRU */
11847
11848 case LT:
11849 case LTU:
11850 code = swap_condition (code);
11851 x = cop0, cop0 = cop1, cop1 = x;
11852 break;
11853
11854 default:
11855 gcc_unreachable ();
11856 }
11857
11858 /* Unsigned parallel compare is not supported by the hardware. Play some
11859 tricks to turn this into a signed comparison against 0. */
11860 if (code == GTU)
11861 {
11862 cop0 = force_reg (mode, cop0);
11863
11864 switch (mode)
11865 {
11866 case V4SImode:
11867 {
11868 rtx t1, t2, mask;
11869
11870 /* Perform a parallel modulo subtraction. */
11871 t1 = gen_reg_rtx (mode);
11872 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11873
11874 /* Extract the original sign bit of op0. */
11875 mask = GEN_INT (-0x80000000);
11876 mask = gen_rtx_CONST_VECTOR (mode,
11877 gen_rtvec (4, mask, mask, mask, mask));
11878 mask = force_reg (mode, mask);
11879 t2 = gen_reg_rtx (mode);
11880 emit_insn (gen_andv4si3 (t2, cop0, mask));
11881
11882 /* XOR it back into the result of the subtraction. This results
11883 in the sign bit set iff we saw unsigned underflow. */
11884 x = gen_reg_rtx (mode);
11885 emit_insn (gen_xorv4si3 (x, t1, t2));
11886
11887 code = GT;
11888 }
11889 break;
11890
11891 case V16QImode:
11892 case V8HImode:
11893 /* Perform a parallel unsigned saturating subtraction. */
11894 x = gen_reg_rtx (mode);
11895 emit_insn (gen_rtx_SET (VOIDmode, x,
11896 gen_rtx_US_MINUS (mode, cop0, cop1)));
11897
11898 code = EQ;
11899 negate = !negate;
11900 break;
11901
11902 default:
11903 gcc_unreachable ();
11904 }
11905
11906 cop0 = x;
11907 cop1 = CONST0_RTX (mode);
11908 }
11909
11910 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11911 operands[1+negate], operands[2-negate]);
11912
11913 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11914 operands[2-negate]);
11915 return true;
11916 }
11917
11918 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
11919 true if we should do zero extension, else sign extension. HIGH_P is
11920 true if we want the N/2 high elements, else the low elements. */
11921
11922 void
11923 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
11924 {
11925 enum machine_mode imode = GET_MODE (operands[1]);
11926 rtx (*unpack)(rtx, rtx, rtx);
11927 rtx se, dest;
11928
11929 switch (imode)
11930 {
11931 case V16QImode:
11932 if (high_p)
11933 unpack = gen_vec_interleave_highv16qi;
11934 else
11935 unpack = gen_vec_interleave_lowv16qi;
11936 break;
11937 case V8HImode:
11938 if (high_p)
11939 unpack = gen_vec_interleave_highv8hi;
11940 else
11941 unpack = gen_vec_interleave_lowv8hi;
11942 break;
11943 case V4SImode:
11944 if (high_p)
11945 unpack = gen_vec_interleave_highv4si;
11946 else
11947 unpack = gen_vec_interleave_lowv4si;
11948 break;
11949 default:
11950 gcc_unreachable ();
11951 }
11952
11953 dest = gen_lowpart (imode, operands[0]);
11954
11955 if (unsigned_p)
11956 se = force_reg (imode, CONST0_RTX (imode));
11957 else
11958 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
11959 operands[1], pc_rtx, pc_rtx);
11960
11961 emit_insn (unpack (dest, operands[1], se));
11962 }
11963
11964 /* Expand conditional increment or decrement using adb/sbb instructions.
11965 The default case using setcc followed by the conditional move can be
11966 done by generic code. */
11967 int
11968 ix86_expand_int_addcc (rtx operands[])
11969 {
11970 enum rtx_code code = GET_CODE (operands[1]);
11971 rtx compare_op;
11972 rtx val = const0_rtx;
11973 bool fpcmp = false;
11974 enum machine_mode mode = GET_MODE (operands[0]);
11975
11976 if (operands[3] != const1_rtx
11977 && operands[3] != constm1_rtx)
11978 return 0;
11979 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11980 ix86_compare_op1, &compare_op))
11981 return 0;
11982 code = GET_CODE (compare_op);
11983
11984 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11985 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11986 {
11987 fpcmp = true;
11988 code = ix86_fp_compare_code_to_integer (code);
11989 }
11990
11991 if (code != LTU)
11992 {
11993 val = constm1_rtx;
11994 if (fpcmp)
11995 PUT_CODE (compare_op,
11996 reverse_condition_maybe_unordered
11997 (GET_CODE (compare_op)));
11998 else
11999 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12000 }
12001 PUT_MODE (compare_op, mode);
12002
12003 /* Construct either adc or sbb insn. */
12004 if ((code == LTU) == (operands[3] == constm1_rtx))
12005 {
12006 switch (GET_MODE (operands[0]))
12007 {
12008 case QImode:
12009 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12010 break;
12011 case HImode:
12012 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12013 break;
12014 case SImode:
12015 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12016 break;
12017 case DImode:
12018 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12019 break;
12020 default:
12021 gcc_unreachable ();
12022 }
12023 }
12024 else
12025 {
12026 switch (GET_MODE (operands[0]))
12027 {
12028 case QImode:
12029 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12030 break;
12031 case HImode:
12032 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12033 break;
12034 case SImode:
12035 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12036 break;
12037 case DImode:
12038 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12039 break;
12040 default:
12041 gcc_unreachable ();
12042 }
12043 }
12044 return 1; /* DONE */
12045 }
12046
12047
12048 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12049 works for floating pointer parameters and nonoffsetable memories.
12050 For pushes, it returns just stack offsets; the values will be saved
12051 in the right order. Maximally three parts are generated. */
12052
12053 static int
12054 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12055 {
12056 int size;
12057
12058 if (!TARGET_64BIT)
12059 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12060 else
12061 size = (GET_MODE_SIZE (mode) + 4) / 8;
12062
12063 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
12064 gcc_assert (size >= 2 && size <= 3);
12065
12066 /* Optimize constant pool reference to immediates. This is used by fp
12067 moves, that force all constants to memory to allow combining. */
12068 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
12069 {
12070 rtx tmp = maybe_get_pool_constant (operand);
12071 if (tmp)
12072 operand = tmp;
12073 }
12074
12075 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
12076 {
12077 /* The only non-offsetable memories we handle are pushes. */
12078 int ok = push_operand (operand, VOIDmode);
12079
12080 gcc_assert (ok);
12081
12082 operand = copy_rtx (operand);
12083 PUT_MODE (operand, Pmode);
12084 parts[0] = parts[1] = parts[2] = operand;
12085 return size;
12086 }
12087
12088 if (GET_CODE (operand) == CONST_VECTOR)
12089 {
12090 enum machine_mode imode = int_mode_for_mode (mode);
12091 /* Caution: if we looked through a constant pool memory above,
12092 the operand may actually have a different mode now. That's
12093 ok, since we want to pun this all the way back to an integer. */
12094 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12095 gcc_assert (operand != NULL);
12096 mode = imode;
12097 }
12098
12099 if (!TARGET_64BIT)
12100 {
12101 if (mode == DImode)
12102 split_di (&operand, 1, &parts[0], &parts[1]);
12103 else
12104 {
12105 if (REG_P (operand))
12106 {
12107 gcc_assert (reload_completed);
12108 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12109 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12110 if (size == 3)
12111 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12112 }
12113 else if (offsettable_memref_p (operand))
12114 {
12115 operand = adjust_address (operand, SImode, 0);
12116 parts[0] = operand;
12117 parts[1] = adjust_address (operand, SImode, 4);
12118 if (size == 3)
12119 parts[2] = adjust_address (operand, SImode, 8);
12120 }
12121 else if (GET_CODE (operand) == CONST_DOUBLE)
12122 {
12123 REAL_VALUE_TYPE r;
12124 long l[4];
12125
12126 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12127 switch (mode)
12128 {
12129 case XFmode:
12130 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12131 parts[2] = gen_int_mode (l[2], SImode);
12132 break;
12133 case DFmode:
12134 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12135 break;
12136 default:
12137 gcc_unreachable ();
12138 }
12139 parts[1] = gen_int_mode (l[1], SImode);
12140 parts[0] = gen_int_mode (l[0], SImode);
12141 }
12142 else
12143 gcc_unreachable ();
12144 }
12145 }
12146 else
12147 {
12148 if (mode == TImode)
12149 split_ti (&operand, 1, &parts[0], &parts[1]);
12150 if (mode == XFmode || mode == TFmode)
12151 {
12152 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12153 if (REG_P (operand))
12154 {
12155 gcc_assert (reload_completed);
12156 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12157 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12158 }
12159 else if (offsettable_memref_p (operand))
12160 {
12161 operand = adjust_address (operand, DImode, 0);
12162 parts[0] = operand;
12163 parts[1] = adjust_address (operand, upper_mode, 8);
12164 }
12165 else if (GET_CODE (operand) == CONST_DOUBLE)
12166 {
12167 REAL_VALUE_TYPE r;
12168 long l[4];
12169
12170 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12171 real_to_target (l, &r, mode);
12172
12173 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12174 if (HOST_BITS_PER_WIDE_INT >= 64)
12175 parts[0]
12176 = gen_int_mode
12177 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12178 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12179 DImode);
12180 else
12181 parts[0] = immed_double_const (l[0], l[1], DImode);
12182
12183 if (upper_mode == SImode)
12184 parts[1] = gen_int_mode (l[2], SImode);
12185 else if (HOST_BITS_PER_WIDE_INT >= 64)
12186 parts[1]
12187 = gen_int_mode
12188 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12189 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12190 DImode);
12191 else
12192 parts[1] = immed_double_const (l[2], l[3], DImode);
12193 }
12194 else
12195 gcc_unreachable ();
12196 }
12197 }
12198
12199 return size;
12200 }
12201
12202 /* Emit insns to perform a move or push of DI, DF, and XF values.
12203 Return false when normal moves are needed; true when all required
12204 insns have been emitted. Operands 2-4 contain the input values
12205 int the correct order; operands 5-7 contain the output values. */
12206
12207 void
12208 ix86_split_long_move (rtx operands[])
12209 {
12210 rtx part[2][3];
12211 int nparts;
12212 int push = 0;
12213 int collisions = 0;
12214 enum machine_mode mode = GET_MODE (operands[0]);
12215
12216 /* The DFmode expanders may ask us to move double.
12217 For 64bit target this is single move. By hiding the fact
12218 here we simplify i386.md splitters. */
12219 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12220 {
12221 /* Optimize constant pool reference to immediates. This is used by
12222 fp moves, that force all constants to memory to allow combining. */
12223
12224 if (GET_CODE (operands[1]) == MEM
12225 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12226 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12227 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12228 if (push_operand (operands[0], VOIDmode))
12229 {
12230 operands[0] = copy_rtx (operands[0]);
12231 PUT_MODE (operands[0], Pmode);
12232 }
12233 else
12234 operands[0] = gen_lowpart (DImode, operands[0]);
12235 operands[1] = gen_lowpart (DImode, operands[1]);
12236 emit_move_insn (operands[0], operands[1]);
12237 return;
12238 }
12239
12240 /* The only non-offsettable memory we handle is push. */
12241 if (push_operand (operands[0], VOIDmode))
12242 push = 1;
12243 else
12244 gcc_assert (GET_CODE (operands[0]) != MEM
12245 || offsettable_memref_p (operands[0]));
12246
12247 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12248 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12249
12250 /* When emitting push, take care for source operands on the stack. */
12251 if (push && GET_CODE (operands[1]) == MEM
12252 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12253 {
12254 if (nparts == 3)
12255 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12256 XEXP (part[1][2], 0));
12257 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12258 XEXP (part[1][1], 0));
12259 }
12260
12261 /* We need to do copy in the right order in case an address register
12262 of the source overlaps the destination. */
12263 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12264 {
12265 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12266 collisions++;
12267 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12268 collisions++;
12269 if (nparts == 3
12270 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12271 collisions++;
12272
12273 /* Collision in the middle part can be handled by reordering. */
12274 if (collisions == 1 && nparts == 3
12275 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12276 {
12277 rtx tmp;
12278 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12279 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12280 }
12281
12282 /* If there are more collisions, we can't handle it by reordering.
12283 Do an lea to the last part and use only one colliding move. */
12284 else if (collisions > 1)
12285 {
12286 rtx base;
12287
12288 collisions = 1;
12289
12290 base = part[0][nparts - 1];
12291
12292 /* Handle the case when the last part isn't valid for lea.
12293 Happens in 64-bit mode storing the 12-byte XFmode. */
12294 if (GET_MODE (base) != Pmode)
12295 base = gen_rtx_REG (Pmode, REGNO (base));
12296
12297 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12298 part[1][0] = replace_equiv_address (part[1][0], base);
12299 part[1][1] = replace_equiv_address (part[1][1],
12300 plus_constant (base, UNITS_PER_WORD));
12301 if (nparts == 3)
12302 part[1][2] = replace_equiv_address (part[1][2],
12303 plus_constant (base, 8));
12304 }
12305 }
12306
12307 if (push)
12308 {
12309 if (!TARGET_64BIT)
12310 {
12311 if (nparts == 3)
12312 {
12313 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12314 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12315 emit_move_insn (part[0][2], part[1][2]);
12316 }
12317 }
12318 else
12319 {
12320 /* In 64bit mode we don't have 32bit push available. In case this is
12321 register, it is OK - we will just use larger counterpart. We also
12322 retype memory - these comes from attempt to avoid REX prefix on
12323 moving of second half of TFmode value. */
12324 if (GET_MODE (part[1][1]) == SImode)
12325 {
12326 switch (GET_CODE (part[1][1]))
12327 {
12328 case MEM:
12329 part[1][1] = adjust_address (part[1][1], DImode, 0);
12330 break;
12331
12332 case REG:
12333 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12334 break;
12335
12336 default:
12337 gcc_unreachable ();
12338 }
12339
12340 if (GET_MODE (part[1][0]) == SImode)
12341 part[1][0] = part[1][1];
12342 }
12343 }
12344 emit_move_insn (part[0][1], part[1][1]);
12345 emit_move_insn (part[0][0], part[1][0]);
12346 return;
12347 }
12348
12349 /* Choose correct order to not overwrite the source before it is copied. */
12350 if ((REG_P (part[0][0])
12351 && REG_P (part[1][1])
12352 && (REGNO (part[0][0]) == REGNO (part[1][1])
12353 || (nparts == 3
12354 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12355 || (collisions > 0
12356 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12357 {
12358 if (nparts == 3)
12359 {
12360 operands[2] = part[0][2];
12361 operands[3] = part[0][1];
12362 operands[4] = part[0][0];
12363 operands[5] = part[1][2];
12364 operands[6] = part[1][1];
12365 operands[7] = part[1][0];
12366 }
12367 else
12368 {
12369 operands[2] = part[0][1];
12370 operands[3] = part[0][0];
12371 operands[5] = part[1][1];
12372 operands[6] = part[1][0];
12373 }
12374 }
12375 else
12376 {
12377 if (nparts == 3)
12378 {
12379 operands[2] = part[0][0];
12380 operands[3] = part[0][1];
12381 operands[4] = part[0][2];
12382 operands[5] = part[1][0];
12383 operands[6] = part[1][1];
12384 operands[7] = part[1][2];
12385 }
12386 else
12387 {
12388 operands[2] = part[0][0];
12389 operands[3] = part[0][1];
12390 operands[5] = part[1][0];
12391 operands[6] = part[1][1];
12392 }
12393 }
12394
12395 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12396 if (optimize_size)
12397 {
12398 if (GET_CODE (operands[5]) == CONST_INT
12399 && operands[5] != const0_rtx
12400 && REG_P (operands[2]))
12401 {
12402 if (GET_CODE (operands[6]) == CONST_INT
12403 && INTVAL (operands[6]) == INTVAL (operands[5]))
12404 operands[6] = operands[2];
12405
12406 if (nparts == 3
12407 && GET_CODE (operands[7]) == CONST_INT
12408 && INTVAL (operands[7]) == INTVAL (operands[5]))
12409 operands[7] = operands[2];
12410 }
12411
12412 if (nparts == 3
12413 && GET_CODE (operands[6]) == CONST_INT
12414 && operands[6] != const0_rtx
12415 && REG_P (operands[3])
12416 && GET_CODE (operands[7]) == CONST_INT
12417 && INTVAL (operands[7]) == INTVAL (operands[6]))
12418 operands[7] = operands[3];
12419 }
12420
12421 emit_move_insn (operands[2], operands[5]);
12422 emit_move_insn (operands[3], operands[6]);
12423 if (nparts == 3)
12424 emit_move_insn (operands[4], operands[7]);
12425
12426 return;
12427 }
12428
12429 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12430 left shift by a constant, either using a single shift or
12431 a sequence of add instructions. */
12432
12433 static void
12434 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12435 {
12436 if (count == 1)
12437 {
12438 emit_insn ((mode == DImode
12439 ? gen_addsi3
12440 : gen_adddi3) (operand, operand, operand));
12441 }
12442 else if (!optimize_size
12443 && count * ix86_cost->add <= ix86_cost->shift_const)
12444 {
12445 int i;
12446 for (i=0; i<count; i++)
12447 {
12448 emit_insn ((mode == DImode
12449 ? gen_addsi3
12450 : gen_adddi3) (operand, operand, operand));
12451 }
12452 }
12453 else
12454 emit_insn ((mode == DImode
12455 ? gen_ashlsi3
12456 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12457 }
12458
12459 void
12460 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12461 {
12462 rtx low[2], high[2];
12463 int count;
12464 const int single_width = mode == DImode ? 32 : 64;
12465
12466 if (GET_CODE (operands[2]) == CONST_INT)
12467 {
12468 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12469 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12470
12471 if (count >= single_width)
12472 {
12473 emit_move_insn (high[0], low[1]);
12474 emit_move_insn (low[0], const0_rtx);
12475
12476 if (count > single_width)
12477 ix86_expand_ashl_const (high[0], count - single_width, mode);
12478 }
12479 else
12480 {
12481 if (!rtx_equal_p (operands[0], operands[1]))
12482 emit_move_insn (operands[0], operands[1]);
12483 emit_insn ((mode == DImode
12484 ? gen_x86_shld_1
12485 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12486 ix86_expand_ashl_const (low[0], count, mode);
12487 }
12488 return;
12489 }
12490
12491 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12492
12493 if (operands[1] == const1_rtx)
12494 {
12495 /* Assuming we've chosen a QImode capable registers, then 1 << N
12496 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12497 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12498 {
12499 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12500
12501 ix86_expand_clear (low[0]);
12502 ix86_expand_clear (high[0]);
12503 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12504
12505 d = gen_lowpart (QImode, low[0]);
12506 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12507 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12508 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12509
12510 d = gen_lowpart (QImode, high[0]);
12511 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12512 s = gen_rtx_NE (QImode, flags, const0_rtx);
12513 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12514 }
12515
12516 /* Otherwise, we can get the same results by manually performing
12517 a bit extract operation on bit 5/6, and then performing the two
12518 shifts. The two methods of getting 0/1 into low/high are exactly
12519 the same size. Avoiding the shift in the bit extract case helps
12520 pentium4 a bit; no one else seems to care much either way. */
12521 else
12522 {
12523 rtx x;
12524
12525 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12526 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12527 else
12528 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12529 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12530
12531 emit_insn ((mode == DImode
12532 ? gen_lshrsi3
12533 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12534 emit_insn ((mode == DImode
12535 ? gen_andsi3
12536 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12537 emit_move_insn (low[0], high[0]);
12538 emit_insn ((mode == DImode
12539 ? gen_xorsi3
12540 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12541 }
12542
12543 emit_insn ((mode == DImode
12544 ? gen_ashlsi3
12545 : gen_ashldi3) (low[0], low[0], operands[2]));
12546 emit_insn ((mode == DImode
12547 ? gen_ashlsi3
12548 : gen_ashldi3) (high[0], high[0], operands[2]));
12549 return;
12550 }
12551
12552 if (operands[1] == constm1_rtx)
12553 {
12554 /* For -1 << N, we can avoid the shld instruction, because we
12555 know that we're shifting 0...31/63 ones into a -1. */
12556 emit_move_insn (low[0], constm1_rtx);
12557 if (optimize_size)
12558 emit_move_insn (high[0], low[0]);
12559 else
12560 emit_move_insn (high[0], constm1_rtx);
12561 }
12562 else
12563 {
12564 if (!rtx_equal_p (operands[0], operands[1]))
12565 emit_move_insn (operands[0], operands[1]);
12566
12567 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12568 emit_insn ((mode == DImode
12569 ? gen_x86_shld_1
12570 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12571 }
12572
12573 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12574
12575 if (TARGET_CMOVE && scratch)
12576 {
12577 ix86_expand_clear (scratch);
12578 emit_insn ((mode == DImode
12579 ? gen_x86_shift_adj_1
12580 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12581 }
12582 else
12583 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12584 }
12585
12586 void
12587 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12588 {
12589 rtx low[2], high[2];
12590 int count;
12591 const int single_width = mode == DImode ? 32 : 64;
12592
12593 if (GET_CODE (operands[2]) == CONST_INT)
12594 {
12595 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12596 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12597
12598 if (count == single_width * 2 - 1)
12599 {
12600 emit_move_insn (high[0], high[1]);
12601 emit_insn ((mode == DImode
12602 ? gen_ashrsi3
12603 : gen_ashrdi3) (high[0], high[0],
12604 GEN_INT (single_width - 1)));
12605 emit_move_insn (low[0], high[0]);
12606
12607 }
12608 else if (count >= single_width)
12609 {
12610 emit_move_insn (low[0], high[1]);
12611 emit_move_insn (high[0], low[0]);
12612 emit_insn ((mode == DImode
12613 ? gen_ashrsi3
12614 : gen_ashrdi3) (high[0], high[0],
12615 GEN_INT (single_width - 1)));
12616 if (count > single_width)
12617 emit_insn ((mode == DImode
12618 ? gen_ashrsi3
12619 : gen_ashrdi3) (low[0], low[0],
12620 GEN_INT (count - single_width)));
12621 }
12622 else
12623 {
12624 if (!rtx_equal_p (operands[0], operands[1]))
12625 emit_move_insn (operands[0], operands[1]);
12626 emit_insn ((mode == DImode
12627 ? gen_x86_shrd_1
12628 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12629 emit_insn ((mode == DImode
12630 ? gen_ashrsi3
12631 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12632 }
12633 }
12634 else
12635 {
12636 if (!rtx_equal_p (operands[0], operands[1]))
12637 emit_move_insn (operands[0], operands[1]);
12638
12639 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12640
12641 emit_insn ((mode == DImode
12642 ? gen_x86_shrd_1
12643 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12644 emit_insn ((mode == DImode
12645 ? gen_ashrsi3
12646 : gen_ashrdi3) (high[0], high[0], operands[2]));
12647
12648 if (TARGET_CMOVE && scratch)
12649 {
12650 emit_move_insn (scratch, high[0]);
12651 emit_insn ((mode == DImode
12652 ? gen_ashrsi3
12653 : gen_ashrdi3) (scratch, scratch,
12654 GEN_INT (single_width - 1)));
12655 emit_insn ((mode == DImode
12656 ? gen_x86_shift_adj_1
12657 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12658 scratch));
12659 }
12660 else
12661 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12662 }
12663 }
12664
12665 void
12666 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12667 {
12668 rtx low[2], high[2];
12669 int count;
12670 const int single_width = mode == DImode ? 32 : 64;
12671
12672 if (GET_CODE (operands[2]) == CONST_INT)
12673 {
12674 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12675 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12676
12677 if (count >= single_width)
12678 {
12679 emit_move_insn (low[0], high[1]);
12680 ix86_expand_clear (high[0]);
12681
12682 if (count > single_width)
12683 emit_insn ((mode == DImode
12684 ? gen_lshrsi3
12685 : gen_lshrdi3) (low[0], low[0],
12686 GEN_INT (count - single_width)));
12687 }
12688 else
12689 {
12690 if (!rtx_equal_p (operands[0], operands[1]))
12691 emit_move_insn (operands[0], operands[1]);
12692 emit_insn ((mode == DImode
12693 ? gen_x86_shrd_1
12694 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12695 emit_insn ((mode == DImode
12696 ? gen_lshrsi3
12697 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12698 }
12699 }
12700 else
12701 {
12702 if (!rtx_equal_p (operands[0], operands[1]))
12703 emit_move_insn (operands[0], operands[1]);
12704
12705 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12706
12707 emit_insn ((mode == DImode
12708 ? gen_x86_shrd_1
12709 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12710 emit_insn ((mode == DImode
12711 ? gen_lshrsi3
12712 : gen_lshrdi3) (high[0], high[0], operands[2]));
12713
12714 /* Heh. By reversing the arguments, we can reuse this pattern. */
12715 if (TARGET_CMOVE && scratch)
12716 {
12717 ix86_expand_clear (scratch);
12718 emit_insn ((mode == DImode
12719 ? gen_x86_shift_adj_1
12720 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12721 scratch));
12722 }
12723 else
12724 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12725 }
12726 }
12727
12728 /* Helper function for the string operations below. Dest VARIABLE whether
12729 it is aligned to VALUE bytes. If true, jump to the label. */
12730 static rtx
12731 ix86_expand_aligntest (rtx variable, int value)
12732 {
12733 rtx label = gen_label_rtx ();
12734 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12735 if (GET_MODE (variable) == DImode)
12736 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12737 else
12738 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12739 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12740 1, label);
12741 return label;
12742 }
12743
12744 /* Adjust COUNTER by the VALUE. */
12745 static void
12746 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12747 {
12748 if (GET_MODE (countreg) == DImode)
12749 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12750 else
12751 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12752 }
12753
12754 /* Zero extend possibly SImode EXP to Pmode register. */
12755 rtx
12756 ix86_zero_extend_to_Pmode (rtx exp)
12757 {
12758 rtx r;
12759 if (GET_MODE (exp) == VOIDmode)
12760 return force_reg (Pmode, exp);
12761 if (GET_MODE (exp) == Pmode)
12762 return copy_to_mode_reg (Pmode, exp);
12763 r = gen_reg_rtx (Pmode);
12764 emit_insn (gen_zero_extendsidi2 (r, exp));
12765 return r;
12766 }
12767
12768 /* Expand string move (memcpy) operation. Use i386 string operations when
12769 profitable. expand_clrmem contains similar code. */
12770 int
12771 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12772 {
12773 rtx srcreg, destreg, countreg, srcexp, destexp;
12774 enum machine_mode counter_mode;
12775 HOST_WIDE_INT align = 0;
12776 unsigned HOST_WIDE_INT count = 0;
12777
12778 if (GET_CODE (align_exp) == CONST_INT)
12779 align = INTVAL (align_exp);
12780
12781 /* Can't use any of this if the user has appropriated esi or edi. */
12782 if (global_regs[4] || global_regs[5])
12783 return 0;
12784
12785 /* This simple hack avoids all inlining code and simplifies code below. */
12786 if (!TARGET_ALIGN_STRINGOPS)
12787 align = 64;
12788
12789 if (GET_CODE (count_exp) == CONST_INT)
12790 {
12791 count = INTVAL (count_exp);
12792 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12793 return 0;
12794 }
12795
12796 /* Figure out proper mode for counter. For 32bits it is always SImode,
12797 for 64bits use SImode when possible, otherwise DImode.
12798 Set count to number of bytes copied when known at compile time. */
12799 if (!TARGET_64BIT
12800 || GET_MODE (count_exp) == SImode
12801 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12802 counter_mode = SImode;
12803 else
12804 counter_mode = DImode;
12805
12806 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12807
12808 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12809 if (destreg != XEXP (dst, 0))
12810 dst = replace_equiv_address_nv (dst, destreg);
12811 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12812 if (srcreg != XEXP (src, 0))
12813 src = replace_equiv_address_nv (src, srcreg);
12814
12815 /* When optimizing for size emit simple rep ; movsb instruction for
12816 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12817 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12818 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12819 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12820 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12821 known to be zero or not. The rep; movsb sequence causes higher
12822 register pressure though, so take that into account. */
12823
12824 if ((!optimize || optimize_size)
12825 && (count == 0
12826 || ((count & 0x03)
12827 && (!optimize_size
12828 || count > 5 * 4
12829 || (count & 3) + count / 4 > 6))))
12830 {
12831 emit_insn (gen_cld ());
12832 countreg = ix86_zero_extend_to_Pmode (count_exp);
12833 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12834 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12835 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12836 destexp, srcexp));
12837 }
12838
12839 /* For constant aligned (or small unaligned) copies use rep movsl
12840 followed by code copying the rest. For PentiumPro ensure 8 byte
12841 alignment to allow rep movsl acceleration. */
12842
12843 else if (count != 0
12844 && (align >= 8
12845 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12846 || optimize_size || count < (unsigned int) 64))
12847 {
12848 unsigned HOST_WIDE_INT offset = 0;
12849 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12850 rtx srcmem, dstmem;
12851
12852 emit_insn (gen_cld ());
12853 if (count & ~(size - 1))
12854 {
12855 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12856 {
12857 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12858
12859 while (offset < (count & ~(size - 1)))
12860 {
12861 srcmem = adjust_automodify_address_nv (src, movs_mode,
12862 srcreg, offset);
12863 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12864 destreg, offset);
12865 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12866 offset += size;
12867 }
12868 }
12869 else
12870 {
12871 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12872 & (TARGET_64BIT ? -1 : 0x3fffffff));
12873 countreg = copy_to_mode_reg (counter_mode, countreg);
12874 countreg = ix86_zero_extend_to_Pmode (countreg);
12875
12876 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12877 GEN_INT (size == 4 ? 2 : 3));
12878 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12879 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12880
12881 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12882 countreg, destexp, srcexp));
12883 offset = count & ~(size - 1);
12884 }
12885 }
12886 if (size == 8 && (count & 0x04))
12887 {
12888 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12889 offset);
12890 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12891 offset);
12892 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12893 offset += 4;
12894 }
12895 if (count & 0x02)
12896 {
12897 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12898 offset);
12899 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12900 offset);
12901 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12902 offset += 2;
12903 }
12904 if (count & 0x01)
12905 {
12906 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12907 offset);
12908 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12909 offset);
12910 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12911 }
12912 }
12913 /* The generic code based on the glibc implementation:
12914 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12915 allowing accelerated copying there)
12916 - copy the data using rep movsl
12917 - copy the rest. */
12918 else
12919 {
12920 rtx countreg2;
12921 rtx label = NULL;
12922 rtx srcmem, dstmem;
12923 int desired_alignment = (TARGET_PENTIUMPRO
12924 && (count == 0 || count >= (unsigned int) 260)
12925 ? 8 : UNITS_PER_WORD);
12926 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12927 dst = change_address (dst, BLKmode, destreg);
12928 src = change_address (src, BLKmode, srcreg);
12929
12930 /* In case we don't know anything about the alignment, default to
12931 library version, since it is usually equally fast and result in
12932 shorter code.
12933
12934 Also emit call when we know that the count is large and call overhead
12935 will not be important. */
12936 if (!TARGET_INLINE_ALL_STRINGOPS
12937 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12938 return 0;
12939
12940 if (TARGET_SINGLE_STRINGOP)
12941 emit_insn (gen_cld ());
12942
12943 countreg2 = gen_reg_rtx (Pmode);
12944 countreg = copy_to_mode_reg (counter_mode, count_exp);
12945
12946 /* We don't use loops to align destination and to copy parts smaller
12947 than 4 bytes, because gcc is able to optimize such code better (in
12948 the case the destination or the count really is aligned, gcc is often
12949 able to predict the branches) and also it is friendlier to the
12950 hardware branch prediction.
12951
12952 Using loops is beneficial for generic case, because we can
12953 handle small counts using the loops. Many CPUs (such as Athlon)
12954 have large REP prefix setup costs.
12955
12956 This is quite costly. Maybe we can revisit this decision later or
12957 add some customizability to this code. */
12958
12959 if (count == 0 && align < desired_alignment)
12960 {
12961 label = gen_label_rtx ();
12962 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12963 LEU, 0, counter_mode, 1, label);
12964 }
12965 if (align <= 1)
12966 {
12967 rtx label = ix86_expand_aligntest (destreg, 1);
12968 srcmem = change_address (src, QImode, srcreg);
12969 dstmem = change_address (dst, QImode, destreg);
12970 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12971 ix86_adjust_counter (countreg, 1);
12972 emit_label (label);
12973 LABEL_NUSES (label) = 1;
12974 }
12975 if (align <= 2)
12976 {
12977 rtx label = ix86_expand_aligntest (destreg, 2);
12978 srcmem = change_address (src, HImode, srcreg);
12979 dstmem = change_address (dst, HImode, destreg);
12980 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12981 ix86_adjust_counter (countreg, 2);
12982 emit_label (label);
12983 LABEL_NUSES (label) = 1;
12984 }
12985 if (align <= 4 && desired_alignment > 4)
12986 {
12987 rtx label = ix86_expand_aligntest (destreg, 4);
12988 srcmem = change_address (src, SImode, srcreg);
12989 dstmem = change_address (dst, SImode, destreg);
12990 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12991 ix86_adjust_counter (countreg, 4);
12992 emit_label (label);
12993 LABEL_NUSES (label) = 1;
12994 }
12995
12996 if (label && desired_alignment > 4 && !TARGET_64BIT)
12997 {
12998 emit_label (label);
12999 LABEL_NUSES (label) = 1;
13000 label = NULL_RTX;
13001 }
13002 if (!TARGET_SINGLE_STRINGOP)
13003 emit_insn (gen_cld ());
13004 if (TARGET_64BIT)
13005 {
13006 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13007 GEN_INT (3)));
13008 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13009 }
13010 else
13011 {
13012 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13013 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13014 }
13015 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
13016 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13017 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
13018 countreg2, destexp, srcexp));
13019
13020 if (label)
13021 {
13022 emit_label (label);
13023 LABEL_NUSES (label) = 1;
13024 }
13025 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13026 {
13027 srcmem = change_address (src, SImode, srcreg);
13028 dstmem = change_address (dst, SImode, destreg);
13029 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13030 }
13031 if ((align <= 4 || count == 0) && TARGET_64BIT)
13032 {
13033 rtx label = ix86_expand_aligntest (countreg, 4);
13034 srcmem = change_address (src, SImode, srcreg);
13035 dstmem = change_address (dst, SImode, destreg);
13036 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13037 emit_label (label);
13038 LABEL_NUSES (label) = 1;
13039 }
13040 if (align > 2 && count != 0 && (count & 2))
13041 {
13042 srcmem = change_address (src, HImode, srcreg);
13043 dstmem = change_address (dst, HImode, destreg);
13044 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13045 }
13046 if (align <= 2 || count == 0)
13047 {
13048 rtx label = ix86_expand_aligntest (countreg, 2);
13049 srcmem = change_address (src, HImode, srcreg);
13050 dstmem = change_address (dst, HImode, destreg);
13051 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13052 emit_label (label);
13053 LABEL_NUSES (label) = 1;
13054 }
13055 if (align > 1 && count != 0 && (count & 1))
13056 {
13057 srcmem = change_address (src, QImode, srcreg);
13058 dstmem = change_address (dst, QImode, destreg);
13059 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13060 }
13061 if (align <= 1 || count == 0)
13062 {
13063 rtx label = ix86_expand_aligntest (countreg, 1);
13064 srcmem = change_address (src, QImode, srcreg);
13065 dstmem = change_address (dst, QImode, destreg);
13066 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13067 emit_label (label);
13068 LABEL_NUSES (label) = 1;
13069 }
13070 }
13071
13072 return 1;
13073 }
13074
13075 /* Expand string clear operation (bzero). Use i386 string operations when
13076 profitable. expand_movmem contains similar code. */
13077 int
13078 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
13079 {
13080 rtx destreg, zeroreg, countreg, destexp;
13081 enum machine_mode counter_mode;
13082 HOST_WIDE_INT align = 0;
13083 unsigned HOST_WIDE_INT count = 0;
13084
13085 if (GET_CODE (align_exp) == CONST_INT)
13086 align = INTVAL (align_exp);
13087
13088 /* Can't use any of this if the user has appropriated esi. */
13089 if (global_regs[4])
13090 return 0;
13091
13092 /* This simple hack avoids all inlining code and simplifies code below. */
13093 if (!TARGET_ALIGN_STRINGOPS)
13094 align = 32;
13095
13096 if (GET_CODE (count_exp) == CONST_INT)
13097 {
13098 count = INTVAL (count_exp);
13099 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
13100 return 0;
13101 }
13102 /* Figure out proper mode for counter. For 32bits it is always SImode,
13103 for 64bits use SImode when possible, otherwise DImode.
13104 Set count to number of bytes copied when known at compile time. */
13105 if (!TARGET_64BIT
13106 || GET_MODE (count_exp) == SImode
13107 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
13108 counter_mode = SImode;
13109 else
13110 counter_mode = DImode;
13111
13112 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13113 if (destreg != XEXP (dst, 0))
13114 dst = replace_equiv_address_nv (dst, destreg);
13115
13116
13117 /* When optimizing for size emit simple rep ; movsb instruction for
13118 counts not divisible by 4. The movl $N, %ecx; rep; stosb
13119 sequence is 7 bytes long, so if optimizing for size and count is
13120 small enough that some stosl, stosw and stosb instructions without
13121 rep are shorter, fall back into the next if. */
13122
13123 if ((!optimize || optimize_size)
13124 && (count == 0
13125 || ((count & 0x03)
13126 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
13127 {
13128 emit_insn (gen_cld ());
13129
13130 countreg = ix86_zero_extend_to_Pmode (count_exp);
13131 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
13132 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
13133 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
13134 }
13135 else if (count != 0
13136 && (align >= 8
13137 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
13138 || optimize_size || count < (unsigned int) 64))
13139 {
13140 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
13141 unsigned HOST_WIDE_INT offset = 0;
13142
13143 emit_insn (gen_cld ());
13144
13145 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
13146 if (count & ~(size - 1))
13147 {
13148 unsigned HOST_WIDE_INT repcount;
13149 unsigned int max_nonrep;
13150
13151 repcount = count >> (size == 4 ? 2 : 3);
13152 if (!TARGET_64BIT)
13153 repcount &= 0x3fffffff;
13154
13155 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
13156 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
13157 bytes. In both cases the latter seems to be faster for small
13158 values of N. */
13159 max_nonrep = size == 4 ? 7 : 4;
13160 if (!optimize_size)
13161 switch (ix86_tune)
13162 {
13163 case PROCESSOR_PENTIUM4:
13164 case PROCESSOR_NOCONA:
13165 max_nonrep = 3;
13166 break;
13167 default:
13168 break;
13169 }
13170
13171 if (repcount <= max_nonrep)
13172 while (repcount-- > 0)
13173 {
13174 rtx mem = adjust_automodify_address_nv (dst,
13175 GET_MODE (zeroreg),
13176 destreg, offset);
13177 emit_insn (gen_strset (destreg, mem, zeroreg));
13178 offset += size;
13179 }
13180 else
13181 {
13182 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
13183 countreg = ix86_zero_extend_to_Pmode (countreg);
13184 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13185 GEN_INT (size == 4 ? 2 : 3));
13186 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13187 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
13188 destexp));
13189 offset = count & ~(size - 1);
13190 }
13191 }
13192 if (size == 8 && (count & 0x04))
13193 {
13194 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
13195 offset);
13196 emit_insn (gen_strset (destreg, mem,
13197 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13198 offset += 4;
13199 }
13200 if (count & 0x02)
13201 {
13202 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
13203 offset);
13204 emit_insn (gen_strset (destreg, mem,
13205 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13206 offset += 2;
13207 }
13208 if (count & 0x01)
13209 {
13210 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
13211 offset);
13212 emit_insn (gen_strset (destreg, mem,
13213 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13214 }
13215 }
13216 else
13217 {
13218 rtx countreg2;
13219 rtx label = NULL;
13220 /* Compute desired alignment of the string operation. */
13221 int desired_alignment = (TARGET_PENTIUMPRO
13222 && (count == 0 || count >= (unsigned int) 260)
13223 ? 8 : UNITS_PER_WORD);
13224
13225 /* In case we don't know anything about the alignment, default to
13226 library version, since it is usually equally fast and result in
13227 shorter code.
13228
13229 Also emit call when we know that the count is large and call overhead
13230 will not be important. */
13231 if (!TARGET_INLINE_ALL_STRINGOPS
13232 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13233 return 0;
13234
13235 if (TARGET_SINGLE_STRINGOP)
13236 emit_insn (gen_cld ());
13237
13238 countreg2 = gen_reg_rtx (Pmode);
13239 countreg = copy_to_mode_reg (counter_mode, count_exp);
13240 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13241 /* Get rid of MEM_OFFSET, it won't be accurate. */
13242 dst = change_address (dst, BLKmode, destreg);
13243
13244 if (count == 0 && align < desired_alignment)
13245 {
13246 label = gen_label_rtx ();
13247 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13248 LEU, 0, counter_mode, 1, label);
13249 }
13250 if (align <= 1)
13251 {
13252 rtx label = ix86_expand_aligntest (destreg, 1);
13253 emit_insn (gen_strset (destreg, dst,
13254 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13255 ix86_adjust_counter (countreg, 1);
13256 emit_label (label);
13257 LABEL_NUSES (label) = 1;
13258 }
13259 if (align <= 2)
13260 {
13261 rtx label = ix86_expand_aligntest (destreg, 2);
13262 emit_insn (gen_strset (destreg, dst,
13263 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13264 ix86_adjust_counter (countreg, 2);
13265 emit_label (label);
13266 LABEL_NUSES (label) = 1;
13267 }
13268 if (align <= 4 && desired_alignment > 4)
13269 {
13270 rtx label = ix86_expand_aligntest (destreg, 4);
13271 emit_insn (gen_strset (destreg, dst,
13272 (TARGET_64BIT
13273 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13274 : zeroreg)));
13275 ix86_adjust_counter (countreg, 4);
13276 emit_label (label);
13277 LABEL_NUSES (label) = 1;
13278 }
13279
13280 if (label && desired_alignment > 4 && !TARGET_64BIT)
13281 {
13282 emit_label (label);
13283 LABEL_NUSES (label) = 1;
13284 label = NULL_RTX;
13285 }
13286
13287 if (!TARGET_SINGLE_STRINGOP)
13288 emit_insn (gen_cld ());
13289 if (TARGET_64BIT)
13290 {
13291 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13292 GEN_INT (3)));
13293 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13294 }
13295 else
13296 {
13297 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13298 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13299 }
13300 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13301 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13302
13303 if (label)
13304 {
13305 emit_label (label);
13306 LABEL_NUSES (label) = 1;
13307 }
13308
13309 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13310 emit_insn (gen_strset (destreg, dst,
13311 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13312 if (TARGET_64BIT && (align <= 4 || count == 0))
13313 {
13314 rtx label = ix86_expand_aligntest (countreg, 4);
13315 emit_insn (gen_strset (destreg, dst,
13316 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13317 emit_label (label);
13318 LABEL_NUSES (label) = 1;
13319 }
13320 if (align > 2 && count != 0 && (count & 2))
13321 emit_insn (gen_strset (destreg, dst,
13322 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13323 if (align <= 2 || count == 0)
13324 {
13325 rtx label = ix86_expand_aligntest (countreg, 2);
13326 emit_insn (gen_strset (destreg, dst,
13327 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13328 emit_label (label);
13329 LABEL_NUSES (label) = 1;
13330 }
13331 if (align > 1 && count != 0 && (count & 1))
13332 emit_insn (gen_strset (destreg, dst,
13333 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13334 if (align <= 1 || count == 0)
13335 {
13336 rtx label = ix86_expand_aligntest (countreg, 1);
13337 emit_insn (gen_strset (destreg, dst,
13338 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13339 emit_label (label);
13340 LABEL_NUSES (label) = 1;
13341 }
13342 }
13343 return 1;
13344 }
13345
13346 /* Expand strlen. */
13347 int
13348 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13349 {
13350 rtx addr, scratch1, scratch2, scratch3, scratch4;
13351
13352 /* The generic case of strlen expander is long. Avoid it's
13353 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13354
13355 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13356 && !TARGET_INLINE_ALL_STRINGOPS
13357 && !optimize_size
13358 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13359 return 0;
13360
13361 addr = force_reg (Pmode, XEXP (src, 0));
13362 scratch1 = gen_reg_rtx (Pmode);
13363
13364 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13365 && !optimize_size)
13366 {
13367 /* Well it seems that some optimizer does not combine a call like
13368 foo(strlen(bar), strlen(bar));
13369 when the move and the subtraction is done here. It does calculate
13370 the length just once when these instructions are done inside of
13371 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13372 often used and I use one fewer register for the lifetime of
13373 output_strlen_unroll() this is better. */
13374
13375 emit_move_insn (out, addr);
13376
13377 ix86_expand_strlensi_unroll_1 (out, src, align);
13378
13379 /* strlensi_unroll_1 returns the address of the zero at the end of
13380 the string, like memchr(), so compute the length by subtracting
13381 the start address. */
13382 if (TARGET_64BIT)
13383 emit_insn (gen_subdi3 (out, out, addr));
13384 else
13385 emit_insn (gen_subsi3 (out, out, addr));
13386 }
13387 else
13388 {
13389 rtx unspec;
13390 scratch2 = gen_reg_rtx (Pmode);
13391 scratch3 = gen_reg_rtx (Pmode);
13392 scratch4 = force_reg (Pmode, constm1_rtx);
13393
13394 emit_move_insn (scratch3, addr);
13395 eoschar = force_reg (QImode, eoschar);
13396
13397 emit_insn (gen_cld ());
13398 src = replace_equiv_address_nv (src, scratch3);
13399
13400 /* If .md starts supporting :P, this can be done in .md. */
13401 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13402 scratch4), UNSPEC_SCAS);
13403 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13404 if (TARGET_64BIT)
13405 {
13406 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13407 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13408 }
13409 else
13410 {
13411 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13412 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13413 }
13414 }
13415 return 1;
13416 }
13417
13418 /* Expand the appropriate insns for doing strlen if not just doing
13419 repnz; scasb
13420
13421 out = result, initialized with the start address
13422 align_rtx = alignment of the address.
13423 scratch = scratch register, initialized with the startaddress when
13424 not aligned, otherwise undefined
13425
13426 This is just the body. It needs the initializations mentioned above and
13427 some address computing at the end. These things are done in i386.md. */
13428
13429 static void
13430 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13431 {
13432 int align;
13433 rtx tmp;
13434 rtx align_2_label = NULL_RTX;
13435 rtx align_3_label = NULL_RTX;
13436 rtx align_4_label = gen_label_rtx ();
13437 rtx end_0_label = gen_label_rtx ();
13438 rtx mem;
13439 rtx tmpreg = gen_reg_rtx (SImode);
13440 rtx scratch = gen_reg_rtx (SImode);
13441 rtx cmp;
13442
13443 align = 0;
13444 if (GET_CODE (align_rtx) == CONST_INT)
13445 align = INTVAL (align_rtx);
13446
13447 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13448
13449 /* Is there a known alignment and is it less than 4? */
13450 if (align < 4)
13451 {
13452 rtx scratch1 = gen_reg_rtx (Pmode);
13453 emit_move_insn (scratch1, out);
13454 /* Is there a known alignment and is it not 2? */
13455 if (align != 2)
13456 {
13457 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13458 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13459
13460 /* Leave just the 3 lower bits. */
13461 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13462 NULL_RTX, 0, OPTAB_WIDEN);
13463
13464 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13465 Pmode, 1, align_4_label);
13466 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13467 Pmode, 1, align_2_label);
13468 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13469 Pmode, 1, align_3_label);
13470 }
13471 else
13472 {
13473 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13474 check if is aligned to 4 - byte. */
13475
13476 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13477 NULL_RTX, 0, OPTAB_WIDEN);
13478
13479 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13480 Pmode, 1, align_4_label);
13481 }
13482
13483 mem = change_address (src, QImode, out);
13484
13485 /* Now compare the bytes. */
13486
13487 /* Compare the first n unaligned byte on a byte per byte basis. */
13488 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13489 QImode, 1, end_0_label);
13490
13491 /* Increment the address. */
13492 if (TARGET_64BIT)
13493 emit_insn (gen_adddi3 (out, out, const1_rtx));
13494 else
13495 emit_insn (gen_addsi3 (out, out, const1_rtx));
13496
13497 /* Not needed with an alignment of 2 */
13498 if (align != 2)
13499 {
13500 emit_label (align_2_label);
13501
13502 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13503 end_0_label);
13504
13505 if (TARGET_64BIT)
13506 emit_insn (gen_adddi3 (out, out, const1_rtx));
13507 else
13508 emit_insn (gen_addsi3 (out, out, const1_rtx));
13509
13510 emit_label (align_3_label);
13511 }
13512
13513 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13514 end_0_label);
13515
13516 if (TARGET_64BIT)
13517 emit_insn (gen_adddi3 (out, out, const1_rtx));
13518 else
13519 emit_insn (gen_addsi3 (out, out, const1_rtx));
13520 }
13521
13522 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13523 align this loop. It gives only huge programs, but does not help to
13524 speed up. */
13525 emit_label (align_4_label);
13526
13527 mem = change_address (src, SImode, out);
13528 emit_move_insn (scratch, mem);
13529 if (TARGET_64BIT)
13530 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13531 else
13532 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13533
13534 /* This formula yields a nonzero result iff one of the bytes is zero.
13535 This saves three branches inside loop and many cycles. */
13536
13537 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13538 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13539 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13540 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13541 gen_int_mode (0x80808080, SImode)));
13542 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13543 align_4_label);
13544
13545 if (TARGET_CMOVE)
13546 {
13547 rtx reg = gen_reg_rtx (SImode);
13548 rtx reg2 = gen_reg_rtx (Pmode);
13549 emit_move_insn (reg, tmpreg);
13550 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13551
13552 /* If zero is not in the first two bytes, move two bytes forward. */
13553 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13554 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13555 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13556 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13557 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13558 reg,
13559 tmpreg)));
13560 /* Emit lea manually to avoid clobbering of flags. */
13561 emit_insn (gen_rtx_SET (SImode, reg2,
13562 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13563
13564 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13565 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13566 emit_insn (gen_rtx_SET (VOIDmode, out,
13567 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13568 reg2,
13569 out)));
13570
13571 }
13572 else
13573 {
13574 rtx end_2_label = gen_label_rtx ();
13575 /* Is zero in the first two bytes? */
13576
13577 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13578 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13579 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13580 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13581 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13582 pc_rtx);
13583 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13584 JUMP_LABEL (tmp) = end_2_label;
13585
13586 /* Not in the first two. Move two bytes forward. */
13587 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13588 if (TARGET_64BIT)
13589 emit_insn (gen_adddi3 (out, out, const2_rtx));
13590 else
13591 emit_insn (gen_addsi3 (out, out, const2_rtx));
13592
13593 emit_label (end_2_label);
13594
13595 }
13596
13597 /* Avoid branch in fixing the byte. */
13598 tmpreg = gen_lowpart (QImode, tmpreg);
13599 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13600 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13601 if (TARGET_64BIT)
13602 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13603 else
13604 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13605
13606 emit_label (end_0_label);
13607 }
13608
13609 void
13610 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13611 rtx callarg2 ATTRIBUTE_UNUSED,
13612 rtx pop, int sibcall)
13613 {
13614 rtx use = NULL, call;
13615
13616 if (pop == const0_rtx)
13617 pop = NULL;
13618 gcc_assert (!TARGET_64BIT || !pop);
13619
13620 if (TARGET_MACHO && !TARGET_64BIT)
13621 {
13622 #if TARGET_MACHO
13623 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13624 fnaddr = machopic_indirect_call_target (fnaddr);
13625 #endif
13626 }
13627 else
13628 {
13629 /* Static functions and indirect calls don't need the pic register. */
13630 if (! TARGET_64BIT && flag_pic
13631 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13632 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13633 use_reg (&use, pic_offset_table_rtx);
13634 }
13635
13636 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13637 {
13638 rtx al = gen_rtx_REG (QImode, 0);
13639 emit_move_insn (al, callarg2);
13640 use_reg (&use, al);
13641 }
13642
13643 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13644 {
13645 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13646 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13647 }
13648 if (sibcall && TARGET_64BIT
13649 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13650 {
13651 rtx addr;
13652 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13653 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13654 emit_move_insn (fnaddr, addr);
13655 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13656 }
13657
13658 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13659 if (retval)
13660 call = gen_rtx_SET (VOIDmode, retval, call);
13661 if (pop)
13662 {
13663 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13664 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13665 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13666 }
13667
13668 call = emit_call_insn (call);
13669 if (use)
13670 CALL_INSN_FUNCTION_USAGE (call) = use;
13671 }
13672
13673 \f
13674 /* Clear stack slot assignments remembered from previous functions.
13675 This is called from INIT_EXPANDERS once before RTL is emitted for each
13676 function. */
13677
13678 static struct machine_function *
13679 ix86_init_machine_status (void)
13680 {
13681 struct machine_function *f;
13682
13683 f = ggc_alloc_cleared (sizeof (struct machine_function));
13684 f->use_fast_prologue_epilogue_nregs = -1;
13685 f->tls_descriptor_call_expanded_p = 0;
13686
13687 return f;
13688 }
13689
13690 /* Return a MEM corresponding to a stack slot with mode MODE.
13691 Allocate a new slot if necessary.
13692
13693 The RTL for a function can have several slots available: N is
13694 which slot to use. */
13695
13696 rtx
13697 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13698 {
13699 struct stack_local_entry *s;
13700
13701 gcc_assert (n < MAX_386_STACK_LOCALS);
13702
13703 for (s = ix86_stack_locals; s; s = s->next)
13704 if (s->mode == mode && s->n == n)
13705 return copy_rtx (s->rtl);
13706
13707 s = (struct stack_local_entry *)
13708 ggc_alloc (sizeof (struct stack_local_entry));
13709 s->n = n;
13710 s->mode = mode;
13711 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13712
13713 s->next = ix86_stack_locals;
13714 ix86_stack_locals = s;
13715 return s->rtl;
13716 }
13717
13718 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13719
13720 static GTY(()) rtx ix86_tls_symbol;
13721 rtx
13722 ix86_tls_get_addr (void)
13723 {
13724
13725 if (!ix86_tls_symbol)
13726 {
13727 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13728 (TARGET_ANY_GNU_TLS
13729 && !TARGET_64BIT)
13730 ? "___tls_get_addr"
13731 : "__tls_get_addr");
13732 }
13733
13734 return ix86_tls_symbol;
13735 }
13736
13737 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13738
13739 static GTY(()) rtx ix86_tls_module_base_symbol;
13740 rtx
13741 ix86_tls_module_base (void)
13742 {
13743
13744 if (!ix86_tls_module_base_symbol)
13745 {
13746 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13747 "_TLS_MODULE_BASE_");
13748 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13749 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13750 }
13751
13752 return ix86_tls_module_base_symbol;
13753 }
13754 \f
13755 /* Calculate the length of the memory address in the instruction
13756 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13757
13758 int
13759 memory_address_length (rtx addr)
13760 {
13761 struct ix86_address parts;
13762 rtx base, index, disp;
13763 int len;
13764 int ok;
13765
13766 if (GET_CODE (addr) == PRE_DEC
13767 || GET_CODE (addr) == POST_INC
13768 || GET_CODE (addr) == PRE_MODIFY
13769 || GET_CODE (addr) == POST_MODIFY)
13770 return 0;
13771
13772 ok = ix86_decompose_address (addr, &parts);
13773 gcc_assert (ok);
13774
13775 if (parts.base && GET_CODE (parts.base) == SUBREG)
13776 parts.base = SUBREG_REG (parts.base);
13777 if (parts.index && GET_CODE (parts.index) == SUBREG)
13778 parts.index = SUBREG_REG (parts.index);
13779
13780 base = parts.base;
13781 index = parts.index;
13782 disp = parts.disp;
13783 len = 0;
13784
13785 /* Rule of thumb:
13786 - esp as the base always wants an index,
13787 - ebp as the base always wants a displacement. */
13788
13789 /* Register Indirect. */
13790 if (base && !index && !disp)
13791 {
13792 /* esp (for its index) and ebp (for its displacement) need
13793 the two-byte modrm form. */
13794 if (addr == stack_pointer_rtx
13795 || addr == arg_pointer_rtx
13796 || addr == frame_pointer_rtx
13797 || addr == hard_frame_pointer_rtx)
13798 len = 1;
13799 }
13800
13801 /* Direct Addressing. */
13802 else if (disp && !base && !index)
13803 len = 4;
13804
13805 else
13806 {
13807 /* Find the length of the displacement constant. */
13808 if (disp)
13809 {
13810 if (base && satisfies_constraint_K (disp))
13811 len = 1;
13812 else
13813 len = 4;
13814 }
13815 /* ebp always wants a displacement. */
13816 else if (base == hard_frame_pointer_rtx)
13817 len = 1;
13818
13819 /* An index requires the two-byte modrm form.... */
13820 if (index
13821 /* ...like esp, which always wants an index. */
13822 || base == stack_pointer_rtx
13823 || base == arg_pointer_rtx
13824 || base == frame_pointer_rtx)
13825 len += 1;
13826 }
13827
13828 return len;
13829 }
13830
13831 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13832 is set, expect that insn have 8bit immediate alternative. */
13833 int
13834 ix86_attr_length_immediate_default (rtx insn, int shortform)
13835 {
13836 int len = 0;
13837 int i;
13838 extract_insn_cached (insn);
13839 for (i = recog_data.n_operands - 1; i >= 0; --i)
13840 if (CONSTANT_P (recog_data.operand[i]))
13841 {
13842 gcc_assert (!len);
13843 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13844 len = 1;
13845 else
13846 {
13847 switch (get_attr_mode (insn))
13848 {
13849 case MODE_QI:
13850 len+=1;
13851 break;
13852 case MODE_HI:
13853 len+=2;
13854 break;
13855 case MODE_SI:
13856 len+=4;
13857 break;
13858 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13859 case MODE_DI:
13860 len+=4;
13861 break;
13862 default:
13863 fatal_insn ("unknown insn mode", insn);
13864 }
13865 }
13866 }
13867 return len;
13868 }
13869 /* Compute default value for "length_address" attribute. */
13870 int
13871 ix86_attr_length_address_default (rtx insn)
13872 {
13873 int i;
13874
13875 if (get_attr_type (insn) == TYPE_LEA)
13876 {
13877 rtx set = PATTERN (insn);
13878
13879 if (GET_CODE (set) == PARALLEL)
13880 set = XVECEXP (set, 0, 0);
13881
13882 gcc_assert (GET_CODE (set) == SET);
13883
13884 return memory_address_length (SET_SRC (set));
13885 }
13886
13887 extract_insn_cached (insn);
13888 for (i = recog_data.n_operands - 1; i >= 0; --i)
13889 if (GET_CODE (recog_data.operand[i]) == MEM)
13890 {
13891 return memory_address_length (XEXP (recog_data.operand[i], 0));
13892 break;
13893 }
13894 return 0;
13895 }
13896 \f
13897 /* Return the maximum number of instructions a cpu can issue. */
13898
13899 static int
13900 ix86_issue_rate (void)
13901 {
13902 switch (ix86_tune)
13903 {
13904 case PROCESSOR_PENTIUM:
13905 case PROCESSOR_K6:
13906 return 2;
13907
13908 case PROCESSOR_PENTIUMPRO:
13909 case PROCESSOR_PENTIUM4:
13910 case PROCESSOR_ATHLON:
13911 case PROCESSOR_K8:
13912 case PROCESSOR_NOCONA:
13913 case PROCESSOR_GENERIC32:
13914 case PROCESSOR_GENERIC64:
13915 return 3;
13916
13917 case PROCESSOR_CORE2:
13918 return 4;
13919
13920 default:
13921 return 1;
13922 }
13923 }
13924
13925 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13926 by DEP_INSN and nothing set by DEP_INSN. */
13927
13928 static int
13929 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13930 {
13931 rtx set, set2;
13932
13933 /* Simplify the test for uninteresting insns. */
13934 if (insn_type != TYPE_SETCC
13935 && insn_type != TYPE_ICMOV
13936 && insn_type != TYPE_FCMOV
13937 && insn_type != TYPE_IBR)
13938 return 0;
13939
13940 if ((set = single_set (dep_insn)) != 0)
13941 {
13942 set = SET_DEST (set);
13943 set2 = NULL_RTX;
13944 }
13945 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13946 && XVECLEN (PATTERN (dep_insn), 0) == 2
13947 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13948 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13949 {
13950 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13951 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13952 }
13953 else
13954 return 0;
13955
13956 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13957 return 0;
13958
13959 /* This test is true if the dependent insn reads the flags but
13960 not any other potentially set register. */
13961 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13962 return 0;
13963
13964 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13965 return 0;
13966
13967 return 1;
13968 }
13969
13970 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13971 address with operands set by DEP_INSN. */
13972
13973 static int
13974 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13975 {
13976 rtx addr;
13977
13978 if (insn_type == TYPE_LEA
13979 && TARGET_PENTIUM)
13980 {
13981 addr = PATTERN (insn);
13982
13983 if (GET_CODE (addr) == PARALLEL)
13984 addr = XVECEXP (addr, 0, 0);
13985
13986 gcc_assert (GET_CODE (addr) == SET);
13987
13988 addr = SET_SRC (addr);
13989 }
13990 else
13991 {
13992 int i;
13993 extract_insn_cached (insn);
13994 for (i = recog_data.n_operands - 1; i >= 0; --i)
13995 if (GET_CODE (recog_data.operand[i]) == MEM)
13996 {
13997 addr = XEXP (recog_data.operand[i], 0);
13998 goto found;
13999 }
14000 return 0;
14001 found:;
14002 }
14003
14004 return modified_in_p (addr, dep_insn);
14005 }
14006
14007 static int
14008 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
14009 {
14010 enum attr_type insn_type, dep_insn_type;
14011 enum attr_memory memory;
14012 rtx set, set2;
14013 int dep_insn_code_number;
14014
14015 /* Anti and output dependencies have zero cost on all CPUs. */
14016 if (REG_NOTE_KIND (link) != 0)
14017 return 0;
14018
14019 dep_insn_code_number = recog_memoized (dep_insn);
14020
14021 /* If we can't recognize the insns, we can't really do anything. */
14022 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
14023 return cost;
14024
14025 insn_type = get_attr_type (insn);
14026 dep_insn_type = get_attr_type (dep_insn);
14027
14028 switch (ix86_tune)
14029 {
14030 case PROCESSOR_PENTIUM:
14031 /* Address Generation Interlock adds a cycle of latency. */
14032 if (ix86_agi_dependent (insn, dep_insn, insn_type))
14033 cost += 1;
14034
14035 /* ??? Compares pair with jump/setcc. */
14036 if (ix86_flags_dependent (insn, dep_insn, insn_type))
14037 cost = 0;
14038
14039 /* Floating point stores require value to be ready one cycle earlier. */
14040 if (insn_type == TYPE_FMOV
14041 && get_attr_memory (insn) == MEMORY_STORE
14042 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14043 cost += 1;
14044 break;
14045
14046 case PROCESSOR_PENTIUMPRO:
14047 memory = get_attr_memory (insn);
14048
14049 /* INT->FP conversion is expensive. */
14050 if (get_attr_fp_int_src (dep_insn))
14051 cost += 5;
14052
14053 /* There is one cycle extra latency between an FP op and a store. */
14054 if (insn_type == TYPE_FMOV
14055 && (set = single_set (dep_insn)) != NULL_RTX
14056 && (set2 = single_set (insn)) != NULL_RTX
14057 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
14058 && GET_CODE (SET_DEST (set2)) == MEM)
14059 cost += 1;
14060
14061 /* Show ability of reorder buffer to hide latency of load by executing
14062 in parallel with previous instruction in case
14063 previous instruction is not needed to compute the address. */
14064 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14065 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14066 {
14067 /* Claim moves to take one cycle, as core can issue one load
14068 at time and the next load can start cycle later. */
14069 if (dep_insn_type == TYPE_IMOV
14070 || dep_insn_type == TYPE_FMOV)
14071 cost = 1;
14072 else if (cost > 1)
14073 cost--;
14074 }
14075 break;
14076
14077 case PROCESSOR_K6:
14078 memory = get_attr_memory (insn);
14079
14080 /* The esp dependency is resolved before the instruction is really
14081 finished. */
14082 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
14083 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
14084 return 1;
14085
14086 /* INT->FP conversion is expensive. */
14087 if (get_attr_fp_int_src (dep_insn))
14088 cost += 5;
14089
14090 /* Show ability of reorder buffer to hide latency of load by executing
14091 in parallel with previous instruction in case
14092 previous instruction is not needed to compute the address. */
14093 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14094 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14095 {
14096 /* Claim moves to take one cycle, as core can issue one load
14097 at time and the next load can start cycle later. */
14098 if (dep_insn_type == TYPE_IMOV
14099 || dep_insn_type == TYPE_FMOV)
14100 cost = 1;
14101 else if (cost > 2)
14102 cost -= 2;
14103 else
14104 cost = 1;
14105 }
14106 break;
14107
14108 case PROCESSOR_ATHLON:
14109 case PROCESSOR_K8:
14110 case PROCESSOR_GENERIC32:
14111 case PROCESSOR_GENERIC64:
14112 memory = get_attr_memory (insn);
14113
14114 /* Show ability of reorder buffer to hide latency of load by executing
14115 in parallel with previous instruction in case
14116 previous instruction is not needed to compute the address. */
14117 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14118 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14119 {
14120 enum attr_unit unit = get_attr_unit (insn);
14121 int loadcost = 3;
14122
14123 /* Because of the difference between the length of integer and
14124 floating unit pipeline preparation stages, the memory operands
14125 for floating point are cheaper.
14126
14127 ??? For Athlon it the difference is most probably 2. */
14128 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
14129 loadcost = 3;
14130 else
14131 loadcost = TARGET_ATHLON ? 2 : 0;
14132
14133 if (cost >= loadcost)
14134 cost -= loadcost;
14135 else
14136 cost = 0;
14137 }
14138
14139 default:
14140 break;
14141 }
14142
14143 return cost;
14144 }
14145
14146 /* How many alternative schedules to try. This should be as wide as the
14147 scheduling freedom in the DFA, but no wider. Making this value too
14148 large results extra work for the scheduler. */
14149
14150 static int
14151 ia32_multipass_dfa_lookahead (void)
14152 {
14153 if (ix86_tune == PROCESSOR_PENTIUM)
14154 return 2;
14155
14156 if (ix86_tune == PROCESSOR_PENTIUMPRO
14157 || ix86_tune == PROCESSOR_K6)
14158 return 1;
14159
14160 else
14161 return 0;
14162 }
14163
14164 \f
14165 /* Compute the alignment given to a constant that is being placed in memory.
14166 EXP is the constant and ALIGN is the alignment that the object would
14167 ordinarily have.
14168 The value of this function is used instead of that alignment to align
14169 the object. */
14170
14171 int
14172 ix86_constant_alignment (tree exp, int align)
14173 {
14174 if (TREE_CODE (exp) == REAL_CST)
14175 {
14176 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
14177 return 64;
14178 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
14179 return 128;
14180 }
14181 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
14182 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
14183 return BITS_PER_WORD;
14184
14185 return align;
14186 }
14187
14188 /* Compute the alignment for a static variable.
14189 TYPE is the data type, and ALIGN is the alignment that
14190 the object would ordinarily have. The value of this function is used
14191 instead of that alignment to align the object. */
14192
14193 int
14194 ix86_data_alignment (tree type, int align)
14195 {
14196 int max_align = optimize_size ? BITS_PER_WORD : 256;
14197
14198 if (AGGREGATE_TYPE_P (type)
14199 && TYPE_SIZE (type)
14200 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14201 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
14202 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
14203 && align < max_align)
14204 align = max_align;
14205
14206 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14207 to 16byte boundary. */
14208 if (TARGET_64BIT)
14209 {
14210 if (AGGREGATE_TYPE_P (type)
14211 && TYPE_SIZE (type)
14212 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14213 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14214 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14215 return 128;
14216 }
14217
14218 if (TREE_CODE (type) == ARRAY_TYPE)
14219 {
14220 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14221 return 64;
14222 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14223 return 128;
14224 }
14225 else if (TREE_CODE (type) == COMPLEX_TYPE)
14226 {
14227
14228 if (TYPE_MODE (type) == DCmode && align < 64)
14229 return 64;
14230 if (TYPE_MODE (type) == XCmode && align < 128)
14231 return 128;
14232 }
14233 else if ((TREE_CODE (type) == RECORD_TYPE
14234 || TREE_CODE (type) == UNION_TYPE
14235 || TREE_CODE (type) == QUAL_UNION_TYPE)
14236 && TYPE_FIELDS (type))
14237 {
14238 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14239 return 64;
14240 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14241 return 128;
14242 }
14243 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14244 || TREE_CODE (type) == INTEGER_TYPE)
14245 {
14246 if (TYPE_MODE (type) == DFmode && align < 64)
14247 return 64;
14248 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14249 return 128;
14250 }
14251
14252 return align;
14253 }
14254
14255 /* Compute the alignment for a local variable.
14256 TYPE is the data type, and ALIGN is the alignment that
14257 the object would ordinarily have. The value of this macro is used
14258 instead of that alignment to align the object. */
14259
14260 int
14261 ix86_local_alignment (tree type, int align)
14262 {
14263 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14264 to 16byte boundary. */
14265 if (TARGET_64BIT)
14266 {
14267 if (AGGREGATE_TYPE_P (type)
14268 && TYPE_SIZE (type)
14269 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14270 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
14271 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14272 return 128;
14273 }
14274 if (TREE_CODE (type) == ARRAY_TYPE)
14275 {
14276 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14277 return 64;
14278 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14279 return 128;
14280 }
14281 else if (TREE_CODE (type) == COMPLEX_TYPE)
14282 {
14283 if (TYPE_MODE (type) == DCmode && align < 64)
14284 return 64;
14285 if (TYPE_MODE (type) == XCmode && align < 128)
14286 return 128;
14287 }
14288 else if ((TREE_CODE (type) == RECORD_TYPE
14289 || TREE_CODE (type) == UNION_TYPE
14290 || TREE_CODE (type) == QUAL_UNION_TYPE)
14291 && TYPE_FIELDS (type))
14292 {
14293 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14294 return 64;
14295 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14296 return 128;
14297 }
14298 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14299 || TREE_CODE (type) == INTEGER_TYPE)
14300 {
14301
14302 if (TYPE_MODE (type) == DFmode && align < 64)
14303 return 64;
14304 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14305 return 128;
14306 }
14307 return align;
14308 }
14309 \f
14310 /* Emit RTL insns to initialize the variable parts of a trampoline.
14311 FNADDR is an RTX for the address of the function's pure code.
14312 CXT is an RTX for the static chain value for the function. */
14313 void
14314 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14315 {
14316 if (!TARGET_64BIT)
14317 {
14318 /* Compute offset from the end of the jmp to the target function. */
14319 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14320 plus_constant (tramp, 10),
14321 NULL_RTX, 1, OPTAB_DIRECT);
14322 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14323 gen_int_mode (0xb9, QImode));
14324 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14325 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14326 gen_int_mode (0xe9, QImode));
14327 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14328 }
14329 else
14330 {
14331 int offset = 0;
14332 /* Try to load address using shorter movl instead of movabs.
14333 We may want to support movq for kernel mode, but kernel does not use
14334 trampolines at the moment. */
14335 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14336 {
14337 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14338 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14339 gen_int_mode (0xbb41, HImode));
14340 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14341 gen_lowpart (SImode, fnaddr));
14342 offset += 6;
14343 }
14344 else
14345 {
14346 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14347 gen_int_mode (0xbb49, HImode));
14348 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14349 fnaddr);
14350 offset += 10;
14351 }
14352 /* Load static chain using movabs to r10. */
14353 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14354 gen_int_mode (0xba49, HImode));
14355 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14356 cxt);
14357 offset += 10;
14358 /* Jump to the r11 */
14359 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14360 gen_int_mode (0xff49, HImode));
14361 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14362 gen_int_mode (0xe3, QImode));
14363 offset += 3;
14364 gcc_assert (offset <= TRAMPOLINE_SIZE);
14365 }
14366
14367 #ifdef ENABLE_EXECUTE_STACK
14368 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14369 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14370 #endif
14371 }
14372 \f
14373 /* Codes for all the SSE/MMX builtins. */
14374 enum ix86_builtins
14375 {
14376 IX86_BUILTIN_ADDPS,
14377 IX86_BUILTIN_ADDSS,
14378 IX86_BUILTIN_DIVPS,
14379 IX86_BUILTIN_DIVSS,
14380 IX86_BUILTIN_MULPS,
14381 IX86_BUILTIN_MULSS,
14382 IX86_BUILTIN_SUBPS,
14383 IX86_BUILTIN_SUBSS,
14384
14385 IX86_BUILTIN_CMPEQPS,
14386 IX86_BUILTIN_CMPLTPS,
14387 IX86_BUILTIN_CMPLEPS,
14388 IX86_BUILTIN_CMPGTPS,
14389 IX86_BUILTIN_CMPGEPS,
14390 IX86_BUILTIN_CMPNEQPS,
14391 IX86_BUILTIN_CMPNLTPS,
14392 IX86_BUILTIN_CMPNLEPS,
14393 IX86_BUILTIN_CMPNGTPS,
14394 IX86_BUILTIN_CMPNGEPS,
14395 IX86_BUILTIN_CMPORDPS,
14396 IX86_BUILTIN_CMPUNORDPS,
14397 IX86_BUILTIN_CMPEQSS,
14398 IX86_BUILTIN_CMPLTSS,
14399 IX86_BUILTIN_CMPLESS,
14400 IX86_BUILTIN_CMPNEQSS,
14401 IX86_BUILTIN_CMPNLTSS,
14402 IX86_BUILTIN_CMPNLESS,
14403 IX86_BUILTIN_CMPNGTSS,
14404 IX86_BUILTIN_CMPNGESS,
14405 IX86_BUILTIN_CMPORDSS,
14406 IX86_BUILTIN_CMPUNORDSS,
14407
14408 IX86_BUILTIN_COMIEQSS,
14409 IX86_BUILTIN_COMILTSS,
14410 IX86_BUILTIN_COMILESS,
14411 IX86_BUILTIN_COMIGTSS,
14412 IX86_BUILTIN_COMIGESS,
14413 IX86_BUILTIN_COMINEQSS,
14414 IX86_BUILTIN_UCOMIEQSS,
14415 IX86_BUILTIN_UCOMILTSS,
14416 IX86_BUILTIN_UCOMILESS,
14417 IX86_BUILTIN_UCOMIGTSS,
14418 IX86_BUILTIN_UCOMIGESS,
14419 IX86_BUILTIN_UCOMINEQSS,
14420
14421 IX86_BUILTIN_CVTPI2PS,
14422 IX86_BUILTIN_CVTPS2PI,
14423 IX86_BUILTIN_CVTSI2SS,
14424 IX86_BUILTIN_CVTSI642SS,
14425 IX86_BUILTIN_CVTSS2SI,
14426 IX86_BUILTIN_CVTSS2SI64,
14427 IX86_BUILTIN_CVTTPS2PI,
14428 IX86_BUILTIN_CVTTSS2SI,
14429 IX86_BUILTIN_CVTTSS2SI64,
14430
14431 IX86_BUILTIN_MAXPS,
14432 IX86_BUILTIN_MAXSS,
14433 IX86_BUILTIN_MINPS,
14434 IX86_BUILTIN_MINSS,
14435
14436 IX86_BUILTIN_LOADUPS,
14437 IX86_BUILTIN_STOREUPS,
14438 IX86_BUILTIN_MOVSS,
14439
14440 IX86_BUILTIN_MOVHLPS,
14441 IX86_BUILTIN_MOVLHPS,
14442 IX86_BUILTIN_LOADHPS,
14443 IX86_BUILTIN_LOADLPS,
14444 IX86_BUILTIN_STOREHPS,
14445 IX86_BUILTIN_STORELPS,
14446
14447 IX86_BUILTIN_MASKMOVQ,
14448 IX86_BUILTIN_MOVMSKPS,
14449 IX86_BUILTIN_PMOVMSKB,
14450
14451 IX86_BUILTIN_MOVNTPS,
14452 IX86_BUILTIN_MOVNTQ,
14453
14454 IX86_BUILTIN_LOADDQU,
14455 IX86_BUILTIN_STOREDQU,
14456
14457 IX86_BUILTIN_PACKSSWB,
14458 IX86_BUILTIN_PACKSSDW,
14459 IX86_BUILTIN_PACKUSWB,
14460
14461 IX86_BUILTIN_PADDB,
14462 IX86_BUILTIN_PADDW,
14463 IX86_BUILTIN_PADDD,
14464 IX86_BUILTIN_PADDQ,
14465 IX86_BUILTIN_PADDSB,
14466 IX86_BUILTIN_PADDSW,
14467 IX86_BUILTIN_PADDUSB,
14468 IX86_BUILTIN_PADDUSW,
14469 IX86_BUILTIN_PSUBB,
14470 IX86_BUILTIN_PSUBW,
14471 IX86_BUILTIN_PSUBD,
14472 IX86_BUILTIN_PSUBQ,
14473 IX86_BUILTIN_PSUBSB,
14474 IX86_BUILTIN_PSUBSW,
14475 IX86_BUILTIN_PSUBUSB,
14476 IX86_BUILTIN_PSUBUSW,
14477
14478 IX86_BUILTIN_PAND,
14479 IX86_BUILTIN_PANDN,
14480 IX86_BUILTIN_POR,
14481 IX86_BUILTIN_PXOR,
14482
14483 IX86_BUILTIN_PAVGB,
14484 IX86_BUILTIN_PAVGW,
14485
14486 IX86_BUILTIN_PCMPEQB,
14487 IX86_BUILTIN_PCMPEQW,
14488 IX86_BUILTIN_PCMPEQD,
14489 IX86_BUILTIN_PCMPGTB,
14490 IX86_BUILTIN_PCMPGTW,
14491 IX86_BUILTIN_PCMPGTD,
14492
14493 IX86_BUILTIN_PMADDWD,
14494
14495 IX86_BUILTIN_PMAXSW,
14496 IX86_BUILTIN_PMAXUB,
14497 IX86_BUILTIN_PMINSW,
14498 IX86_BUILTIN_PMINUB,
14499
14500 IX86_BUILTIN_PMULHUW,
14501 IX86_BUILTIN_PMULHW,
14502 IX86_BUILTIN_PMULLW,
14503
14504 IX86_BUILTIN_PSADBW,
14505 IX86_BUILTIN_PSHUFW,
14506
14507 IX86_BUILTIN_PSLLW,
14508 IX86_BUILTIN_PSLLD,
14509 IX86_BUILTIN_PSLLQ,
14510 IX86_BUILTIN_PSRAW,
14511 IX86_BUILTIN_PSRAD,
14512 IX86_BUILTIN_PSRLW,
14513 IX86_BUILTIN_PSRLD,
14514 IX86_BUILTIN_PSRLQ,
14515 IX86_BUILTIN_PSLLWI,
14516 IX86_BUILTIN_PSLLDI,
14517 IX86_BUILTIN_PSLLQI,
14518 IX86_BUILTIN_PSRAWI,
14519 IX86_BUILTIN_PSRADI,
14520 IX86_BUILTIN_PSRLWI,
14521 IX86_BUILTIN_PSRLDI,
14522 IX86_BUILTIN_PSRLQI,
14523
14524 IX86_BUILTIN_PUNPCKHBW,
14525 IX86_BUILTIN_PUNPCKHWD,
14526 IX86_BUILTIN_PUNPCKHDQ,
14527 IX86_BUILTIN_PUNPCKLBW,
14528 IX86_BUILTIN_PUNPCKLWD,
14529 IX86_BUILTIN_PUNPCKLDQ,
14530
14531 IX86_BUILTIN_SHUFPS,
14532
14533 IX86_BUILTIN_RCPPS,
14534 IX86_BUILTIN_RCPSS,
14535 IX86_BUILTIN_RSQRTPS,
14536 IX86_BUILTIN_RSQRTSS,
14537 IX86_BUILTIN_SQRTPS,
14538 IX86_BUILTIN_SQRTSS,
14539
14540 IX86_BUILTIN_UNPCKHPS,
14541 IX86_BUILTIN_UNPCKLPS,
14542
14543 IX86_BUILTIN_ANDPS,
14544 IX86_BUILTIN_ANDNPS,
14545 IX86_BUILTIN_ORPS,
14546 IX86_BUILTIN_XORPS,
14547
14548 IX86_BUILTIN_EMMS,
14549 IX86_BUILTIN_LDMXCSR,
14550 IX86_BUILTIN_STMXCSR,
14551 IX86_BUILTIN_SFENCE,
14552
14553 /* 3DNow! Original */
14554 IX86_BUILTIN_FEMMS,
14555 IX86_BUILTIN_PAVGUSB,
14556 IX86_BUILTIN_PF2ID,
14557 IX86_BUILTIN_PFACC,
14558 IX86_BUILTIN_PFADD,
14559 IX86_BUILTIN_PFCMPEQ,
14560 IX86_BUILTIN_PFCMPGE,
14561 IX86_BUILTIN_PFCMPGT,
14562 IX86_BUILTIN_PFMAX,
14563 IX86_BUILTIN_PFMIN,
14564 IX86_BUILTIN_PFMUL,
14565 IX86_BUILTIN_PFRCP,
14566 IX86_BUILTIN_PFRCPIT1,
14567 IX86_BUILTIN_PFRCPIT2,
14568 IX86_BUILTIN_PFRSQIT1,
14569 IX86_BUILTIN_PFRSQRT,
14570 IX86_BUILTIN_PFSUB,
14571 IX86_BUILTIN_PFSUBR,
14572 IX86_BUILTIN_PI2FD,
14573 IX86_BUILTIN_PMULHRW,
14574
14575 /* 3DNow! Athlon Extensions */
14576 IX86_BUILTIN_PF2IW,
14577 IX86_BUILTIN_PFNACC,
14578 IX86_BUILTIN_PFPNACC,
14579 IX86_BUILTIN_PI2FW,
14580 IX86_BUILTIN_PSWAPDSI,
14581 IX86_BUILTIN_PSWAPDSF,
14582
14583 /* SSE2 */
14584 IX86_BUILTIN_ADDPD,
14585 IX86_BUILTIN_ADDSD,
14586 IX86_BUILTIN_DIVPD,
14587 IX86_BUILTIN_DIVSD,
14588 IX86_BUILTIN_MULPD,
14589 IX86_BUILTIN_MULSD,
14590 IX86_BUILTIN_SUBPD,
14591 IX86_BUILTIN_SUBSD,
14592
14593 IX86_BUILTIN_CMPEQPD,
14594 IX86_BUILTIN_CMPLTPD,
14595 IX86_BUILTIN_CMPLEPD,
14596 IX86_BUILTIN_CMPGTPD,
14597 IX86_BUILTIN_CMPGEPD,
14598 IX86_BUILTIN_CMPNEQPD,
14599 IX86_BUILTIN_CMPNLTPD,
14600 IX86_BUILTIN_CMPNLEPD,
14601 IX86_BUILTIN_CMPNGTPD,
14602 IX86_BUILTIN_CMPNGEPD,
14603 IX86_BUILTIN_CMPORDPD,
14604 IX86_BUILTIN_CMPUNORDPD,
14605 IX86_BUILTIN_CMPNEPD,
14606 IX86_BUILTIN_CMPEQSD,
14607 IX86_BUILTIN_CMPLTSD,
14608 IX86_BUILTIN_CMPLESD,
14609 IX86_BUILTIN_CMPNEQSD,
14610 IX86_BUILTIN_CMPNLTSD,
14611 IX86_BUILTIN_CMPNLESD,
14612 IX86_BUILTIN_CMPORDSD,
14613 IX86_BUILTIN_CMPUNORDSD,
14614 IX86_BUILTIN_CMPNESD,
14615
14616 IX86_BUILTIN_COMIEQSD,
14617 IX86_BUILTIN_COMILTSD,
14618 IX86_BUILTIN_COMILESD,
14619 IX86_BUILTIN_COMIGTSD,
14620 IX86_BUILTIN_COMIGESD,
14621 IX86_BUILTIN_COMINEQSD,
14622 IX86_BUILTIN_UCOMIEQSD,
14623 IX86_BUILTIN_UCOMILTSD,
14624 IX86_BUILTIN_UCOMILESD,
14625 IX86_BUILTIN_UCOMIGTSD,
14626 IX86_BUILTIN_UCOMIGESD,
14627 IX86_BUILTIN_UCOMINEQSD,
14628
14629 IX86_BUILTIN_MAXPD,
14630 IX86_BUILTIN_MAXSD,
14631 IX86_BUILTIN_MINPD,
14632 IX86_BUILTIN_MINSD,
14633
14634 IX86_BUILTIN_ANDPD,
14635 IX86_BUILTIN_ANDNPD,
14636 IX86_BUILTIN_ORPD,
14637 IX86_BUILTIN_XORPD,
14638
14639 IX86_BUILTIN_SQRTPD,
14640 IX86_BUILTIN_SQRTSD,
14641
14642 IX86_BUILTIN_UNPCKHPD,
14643 IX86_BUILTIN_UNPCKLPD,
14644
14645 IX86_BUILTIN_SHUFPD,
14646
14647 IX86_BUILTIN_LOADUPD,
14648 IX86_BUILTIN_STOREUPD,
14649 IX86_BUILTIN_MOVSD,
14650
14651 IX86_BUILTIN_LOADHPD,
14652 IX86_BUILTIN_LOADLPD,
14653
14654 IX86_BUILTIN_CVTDQ2PD,
14655 IX86_BUILTIN_CVTDQ2PS,
14656
14657 IX86_BUILTIN_CVTPD2DQ,
14658 IX86_BUILTIN_CVTPD2PI,
14659 IX86_BUILTIN_CVTPD2PS,
14660 IX86_BUILTIN_CVTTPD2DQ,
14661 IX86_BUILTIN_CVTTPD2PI,
14662
14663 IX86_BUILTIN_CVTPI2PD,
14664 IX86_BUILTIN_CVTSI2SD,
14665 IX86_BUILTIN_CVTSI642SD,
14666
14667 IX86_BUILTIN_CVTSD2SI,
14668 IX86_BUILTIN_CVTSD2SI64,
14669 IX86_BUILTIN_CVTSD2SS,
14670 IX86_BUILTIN_CVTSS2SD,
14671 IX86_BUILTIN_CVTTSD2SI,
14672 IX86_BUILTIN_CVTTSD2SI64,
14673
14674 IX86_BUILTIN_CVTPS2DQ,
14675 IX86_BUILTIN_CVTPS2PD,
14676 IX86_BUILTIN_CVTTPS2DQ,
14677
14678 IX86_BUILTIN_MOVNTI,
14679 IX86_BUILTIN_MOVNTPD,
14680 IX86_BUILTIN_MOVNTDQ,
14681
14682 /* SSE2 MMX */
14683 IX86_BUILTIN_MASKMOVDQU,
14684 IX86_BUILTIN_MOVMSKPD,
14685 IX86_BUILTIN_PMOVMSKB128,
14686
14687 IX86_BUILTIN_PACKSSWB128,
14688 IX86_BUILTIN_PACKSSDW128,
14689 IX86_BUILTIN_PACKUSWB128,
14690
14691 IX86_BUILTIN_PADDB128,
14692 IX86_BUILTIN_PADDW128,
14693 IX86_BUILTIN_PADDD128,
14694 IX86_BUILTIN_PADDQ128,
14695 IX86_BUILTIN_PADDSB128,
14696 IX86_BUILTIN_PADDSW128,
14697 IX86_BUILTIN_PADDUSB128,
14698 IX86_BUILTIN_PADDUSW128,
14699 IX86_BUILTIN_PSUBB128,
14700 IX86_BUILTIN_PSUBW128,
14701 IX86_BUILTIN_PSUBD128,
14702 IX86_BUILTIN_PSUBQ128,
14703 IX86_BUILTIN_PSUBSB128,
14704 IX86_BUILTIN_PSUBSW128,
14705 IX86_BUILTIN_PSUBUSB128,
14706 IX86_BUILTIN_PSUBUSW128,
14707
14708 IX86_BUILTIN_PAND128,
14709 IX86_BUILTIN_PANDN128,
14710 IX86_BUILTIN_POR128,
14711 IX86_BUILTIN_PXOR128,
14712
14713 IX86_BUILTIN_PAVGB128,
14714 IX86_BUILTIN_PAVGW128,
14715
14716 IX86_BUILTIN_PCMPEQB128,
14717 IX86_BUILTIN_PCMPEQW128,
14718 IX86_BUILTIN_PCMPEQD128,
14719 IX86_BUILTIN_PCMPGTB128,
14720 IX86_BUILTIN_PCMPGTW128,
14721 IX86_BUILTIN_PCMPGTD128,
14722
14723 IX86_BUILTIN_PMADDWD128,
14724
14725 IX86_BUILTIN_PMAXSW128,
14726 IX86_BUILTIN_PMAXUB128,
14727 IX86_BUILTIN_PMINSW128,
14728 IX86_BUILTIN_PMINUB128,
14729
14730 IX86_BUILTIN_PMULUDQ,
14731 IX86_BUILTIN_PMULUDQ128,
14732 IX86_BUILTIN_PMULHUW128,
14733 IX86_BUILTIN_PMULHW128,
14734 IX86_BUILTIN_PMULLW128,
14735
14736 IX86_BUILTIN_PSADBW128,
14737 IX86_BUILTIN_PSHUFHW,
14738 IX86_BUILTIN_PSHUFLW,
14739 IX86_BUILTIN_PSHUFD,
14740
14741 IX86_BUILTIN_PSLLW128,
14742 IX86_BUILTIN_PSLLD128,
14743 IX86_BUILTIN_PSLLQ128,
14744 IX86_BUILTIN_PSRAW128,
14745 IX86_BUILTIN_PSRAD128,
14746 IX86_BUILTIN_PSRLW128,
14747 IX86_BUILTIN_PSRLD128,
14748 IX86_BUILTIN_PSRLQ128,
14749 IX86_BUILTIN_PSLLDQI128,
14750 IX86_BUILTIN_PSLLWI128,
14751 IX86_BUILTIN_PSLLDI128,
14752 IX86_BUILTIN_PSLLQI128,
14753 IX86_BUILTIN_PSRAWI128,
14754 IX86_BUILTIN_PSRADI128,
14755 IX86_BUILTIN_PSRLDQI128,
14756 IX86_BUILTIN_PSRLWI128,
14757 IX86_BUILTIN_PSRLDI128,
14758 IX86_BUILTIN_PSRLQI128,
14759
14760 IX86_BUILTIN_PUNPCKHBW128,
14761 IX86_BUILTIN_PUNPCKHWD128,
14762 IX86_BUILTIN_PUNPCKHDQ128,
14763 IX86_BUILTIN_PUNPCKHQDQ128,
14764 IX86_BUILTIN_PUNPCKLBW128,
14765 IX86_BUILTIN_PUNPCKLWD128,
14766 IX86_BUILTIN_PUNPCKLDQ128,
14767 IX86_BUILTIN_PUNPCKLQDQ128,
14768
14769 IX86_BUILTIN_CLFLUSH,
14770 IX86_BUILTIN_MFENCE,
14771 IX86_BUILTIN_LFENCE,
14772
14773 /* Prescott New Instructions. */
14774 IX86_BUILTIN_ADDSUBPS,
14775 IX86_BUILTIN_HADDPS,
14776 IX86_BUILTIN_HSUBPS,
14777 IX86_BUILTIN_MOVSHDUP,
14778 IX86_BUILTIN_MOVSLDUP,
14779 IX86_BUILTIN_ADDSUBPD,
14780 IX86_BUILTIN_HADDPD,
14781 IX86_BUILTIN_HSUBPD,
14782 IX86_BUILTIN_LDDQU,
14783
14784 IX86_BUILTIN_MONITOR,
14785 IX86_BUILTIN_MWAIT,
14786
14787 /* SSSE3. */
14788 IX86_BUILTIN_PHADDW,
14789 IX86_BUILTIN_PHADDD,
14790 IX86_BUILTIN_PHADDSW,
14791 IX86_BUILTIN_PHSUBW,
14792 IX86_BUILTIN_PHSUBD,
14793 IX86_BUILTIN_PHSUBSW,
14794 IX86_BUILTIN_PMADDUBSW,
14795 IX86_BUILTIN_PMULHRSW,
14796 IX86_BUILTIN_PSHUFB,
14797 IX86_BUILTIN_PSIGNB,
14798 IX86_BUILTIN_PSIGNW,
14799 IX86_BUILTIN_PSIGND,
14800 IX86_BUILTIN_PALIGNR,
14801 IX86_BUILTIN_PABSB,
14802 IX86_BUILTIN_PABSW,
14803 IX86_BUILTIN_PABSD,
14804
14805 IX86_BUILTIN_PHADDW128,
14806 IX86_BUILTIN_PHADDD128,
14807 IX86_BUILTIN_PHADDSW128,
14808 IX86_BUILTIN_PHSUBW128,
14809 IX86_BUILTIN_PHSUBD128,
14810 IX86_BUILTIN_PHSUBSW128,
14811 IX86_BUILTIN_PMADDUBSW128,
14812 IX86_BUILTIN_PMULHRSW128,
14813 IX86_BUILTIN_PSHUFB128,
14814 IX86_BUILTIN_PSIGNB128,
14815 IX86_BUILTIN_PSIGNW128,
14816 IX86_BUILTIN_PSIGND128,
14817 IX86_BUILTIN_PALIGNR128,
14818 IX86_BUILTIN_PABSB128,
14819 IX86_BUILTIN_PABSW128,
14820 IX86_BUILTIN_PABSD128,
14821
14822 IX86_BUILTIN_VEC_INIT_V2SI,
14823 IX86_BUILTIN_VEC_INIT_V4HI,
14824 IX86_BUILTIN_VEC_INIT_V8QI,
14825 IX86_BUILTIN_VEC_EXT_V2DF,
14826 IX86_BUILTIN_VEC_EXT_V2DI,
14827 IX86_BUILTIN_VEC_EXT_V4SF,
14828 IX86_BUILTIN_VEC_EXT_V4SI,
14829 IX86_BUILTIN_VEC_EXT_V8HI,
14830 IX86_BUILTIN_VEC_EXT_V2SI,
14831 IX86_BUILTIN_VEC_EXT_V4HI,
14832 IX86_BUILTIN_VEC_SET_V8HI,
14833 IX86_BUILTIN_VEC_SET_V4HI,
14834
14835 IX86_BUILTIN_MAX
14836 };
14837
14838 /* Table for the ix86 builtin decls. */
14839 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
14840
14841 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
14842 * if the target_flags include one of MASK. Stores the function decl
14843 * in the ix86_builtins array.
14844 * Returns the function decl or NULL_TREE, if the builtin was not added. */
14845
14846 static inline tree
14847 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
14848 {
14849 tree decl = NULL_TREE;
14850
14851 if (mask & target_flags
14852 && (!(mask & MASK_64BIT) || TARGET_64BIT))
14853 {
14854 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
14855 NULL, NULL_TREE);
14856 ix86_builtins[(int) code] = decl;
14857 }
14858
14859 return decl;
14860 }
14861
14862 /* Like def_builtin, but also marks the function decl "const". */
14863
14864 static inline tree
14865 def_builtin_const (int mask, const char *name, tree type,
14866 enum ix86_builtins code)
14867 {
14868 tree decl = def_builtin (mask, name, type, code);
14869 if (decl)
14870 TREE_READONLY (decl) = 1;
14871 return decl;
14872 }
14873
14874 /* Bits for builtin_description.flag. */
14875
14876 /* Set when we don't support the comparison natively, and should
14877 swap_comparison in order to support it. */
14878 #define BUILTIN_DESC_SWAP_OPERANDS 1
14879
14880 struct builtin_description
14881 {
14882 const unsigned int mask;
14883 const enum insn_code icode;
14884 const char *const name;
14885 const enum ix86_builtins code;
14886 const enum rtx_code comparison;
14887 const unsigned int flag;
14888 };
14889
14890 static const struct builtin_description bdesc_comi[] =
14891 {
14892 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14893 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14894 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14895 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14896 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14897 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14898 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14899 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14900 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14901 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14902 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14903 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14904 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14905 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14906 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14907 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14908 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14909 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14910 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14911 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14912 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14913 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14914 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14915 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14916 };
14917
14918 static const struct builtin_description bdesc_2arg[] =
14919 {
14920 /* SSE */
14921 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14922 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14923 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14924 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14925 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14926 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14927 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14928 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14929
14930 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14931 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14932 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14933 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14934 BUILTIN_DESC_SWAP_OPERANDS },
14935 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14936 BUILTIN_DESC_SWAP_OPERANDS },
14937 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14938 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14939 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14940 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14941 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14942 BUILTIN_DESC_SWAP_OPERANDS },
14943 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14944 BUILTIN_DESC_SWAP_OPERANDS },
14945 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14946 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14947 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14948 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14949 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14950 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14951 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14952 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14953 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14954 BUILTIN_DESC_SWAP_OPERANDS },
14955 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14956 BUILTIN_DESC_SWAP_OPERANDS },
14957 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
14958
14959 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14960 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14961 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14962 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14963
14964 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14965 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14966 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14967 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14968
14969 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14970 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14971 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14972 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14973 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14974
14975 /* MMX */
14976 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14977 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14978 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14979 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14980 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14981 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14982 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14983 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14984
14985 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14986 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14987 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14988 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14989 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14990 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14991 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14992 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14993
14994 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14995 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14996 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14997
14998 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14999 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
15000 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
15001 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
15002
15003 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
15004 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
15005
15006 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
15007 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
15008 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
15009 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
15010 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
15011 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
15012
15013 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
15014 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
15015 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
15016 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
15017
15018 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
15019 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
15020 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
15021 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
15022 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
15023 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
15024
15025 /* Special. */
15026 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
15027 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
15028 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
15029
15030 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
15031 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
15032 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
15033
15034 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
15035 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
15036 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
15037 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
15038 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
15039 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
15040
15041 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
15042 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
15043 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
15044 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
15045 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
15046 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
15047
15048 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
15049 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
15050 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
15051 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
15052
15053 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
15054 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
15055
15056 /* SSE2 */
15057 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
15058 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
15059 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
15060 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
15061 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
15062 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
15063 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
15064 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
15065
15066 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
15067 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
15068 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
15069 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
15070 BUILTIN_DESC_SWAP_OPERANDS },
15071 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
15072 BUILTIN_DESC_SWAP_OPERANDS },
15073 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
15074 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
15075 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
15076 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
15077 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
15078 BUILTIN_DESC_SWAP_OPERANDS },
15079 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
15080 BUILTIN_DESC_SWAP_OPERANDS },
15081 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
15082 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
15083 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
15084 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
15085 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
15086 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
15087 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
15088 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
15089 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
15090
15091 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
15092 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
15093 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
15094 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
15095
15096 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
15097 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
15098 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
15099 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
15100
15101 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
15102 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
15103 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
15104
15105 /* SSE2 MMX */
15106 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
15107 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
15108 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
15109 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
15110 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
15111 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
15112 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
15113 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
15114
15115 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
15116 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
15117 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
15118 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
15119 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
15120 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
15121 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
15122 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
15123
15124 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
15125 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
15126
15127 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
15128 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
15129 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
15130 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
15131
15132 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
15133 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
15134
15135 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
15136 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
15137 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
15138 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
15139 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
15140 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
15141
15142 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
15143 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
15144 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
15145 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
15146
15147 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
15148 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
15149 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
15150 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
15151 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
15152 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
15153 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
15154 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
15155
15156 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
15157 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
15158 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
15159
15160 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
15161 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
15162
15163 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
15164 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
15165
15166 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
15167 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
15168 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
15169
15170 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
15171 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
15172 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
15173
15174 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
15175 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
15176
15177 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
15178
15179 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
15180 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
15181 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
15182 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
15183
15184 /* SSE3 MMX */
15185 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
15186 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
15187 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
15188 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
15189 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
15190 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
15191
15192 /* SSSE3 */
15193 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
15194 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
15195 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
15196 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
15197 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
15198 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
15199 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
15200 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
15201 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
15202 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
15203 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
15204 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
15205 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
15206 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
15207 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
15208 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
15209 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
15210 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
15211 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
15212 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
15213 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
15214 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
15215 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
15216 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
15217 };
15218
15219 static const struct builtin_description bdesc_1arg[] =
15220 {
15221 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
15222 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
15223
15224 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
15225 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
15226 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
15227
15228 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
15229 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
15230 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
15231 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
15232 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
15233 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
15234
15235 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
15236 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
15237
15238 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
15239
15240 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
15241 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
15242
15243 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
15244 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
15245 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
15246 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
15247 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
15248
15249 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
15250
15251 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
15252 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
15253 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
15254 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
15255
15256 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
15257 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
15258 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
15259
15260 /* SSE3 */
15261 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
15262 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
15263
15264 /* SSSE3 */
15265 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
15266 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
15267 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
15268 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
15269 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
15270 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
15271 };
15272
15273 static void
15274 ix86_init_builtins (void)
15275 {
15276 if (TARGET_MMX)
15277 ix86_init_mmx_sse_builtins ();
15278 }
15279
15280 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
15281 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
15282 builtins. */
15283 static void
15284 ix86_init_mmx_sse_builtins (void)
15285 {
15286 const struct builtin_description * d;
15287 size_t i;
15288
15289 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
15290 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15291 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
15292 tree V2DI_type_node
15293 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
15294 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
15295 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
15296 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
15297 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15298 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
15299 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
15300
15301 tree pchar_type_node = build_pointer_type (char_type_node);
15302 tree pcchar_type_node = build_pointer_type (
15303 build_type_variant (char_type_node, 1, 0));
15304 tree pfloat_type_node = build_pointer_type (float_type_node);
15305 tree pcfloat_type_node = build_pointer_type (
15306 build_type_variant (float_type_node, 1, 0));
15307 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
15308 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
15309 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
15310
15311 /* Comparisons. */
15312 tree int_ftype_v4sf_v4sf
15313 = build_function_type_list (integer_type_node,
15314 V4SF_type_node, V4SF_type_node, NULL_TREE);
15315 tree v4si_ftype_v4sf_v4sf
15316 = build_function_type_list (V4SI_type_node,
15317 V4SF_type_node, V4SF_type_node, NULL_TREE);
15318 /* MMX/SSE/integer conversions. */
15319 tree int_ftype_v4sf
15320 = build_function_type_list (integer_type_node,
15321 V4SF_type_node, NULL_TREE);
15322 tree int64_ftype_v4sf
15323 = build_function_type_list (long_long_integer_type_node,
15324 V4SF_type_node, NULL_TREE);
15325 tree int_ftype_v8qi
15326 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15327 tree v4sf_ftype_v4sf_int
15328 = build_function_type_list (V4SF_type_node,
15329 V4SF_type_node, integer_type_node, NULL_TREE);
15330 tree v4sf_ftype_v4sf_int64
15331 = build_function_type_list (V4SF_type_node,
15332 V4SF_type_node, long_long_integer_type_node,
15333 NULL_TREE);
15334 tree v4sf_ftype_v4sf_v2si
15335 = build_function_type_list (V4SF_type_node,
15336 V4SF_type_node, V2SI_type_node, NULL_TREE);
15337
15338 /* Miscellaneous. */
15339 tree v8qi_ftype_v4hi_v4hi
15340 = build_function_type_list (V8QI_type_node,
15341 V4HI_type_node, V4HI_type_node, NULL_TREE);
15342 tree v4hi_ftype_v2si_v2si
15343 = build_function_type_list (V4HI_type_node,
15344 V2SI_type_node, V2SI_type_node, NULL_TREE);
15345 tree v4sf_ftype_v4sf_v4sf_int
15346 = build_function_type_list (V4SF_type_node,
15347 V4SF_type_node, V4SF_type_node,
15348 integer_type_node, NULL_TREE);
15349 tree v2si_ftype_v4hi_v4hi
15350 = build_function_type_list (V2SI_type_node,
15351 V4HI_type_node, V4HI_type_node, NULL_TREE);
15352 tree v4hi_ftype_v4hi_int
15353 = build_function_type_list (V4HI_type_node,
15354 V4HI_type_node, integer_type_node, NULL_TREE);
15355 tree v4hi_ftype_v4hi_di
15356 = build_function_type_list (V4HI_type_node,
15357 V4HI_type_node, long_long_unsigned_type_node,
15358 NULL_TREE);
15359 tree v2si_ftype_v2si_di
15360 = build_function_type_list (V2SI_type_node,
15361 V2SI_type_node, long_long_unsigned_type_node,
15362 NULL_TREE);
15363 tree void_ftype_void
15364 = build_function_type (void_type_node, void_list_node);
15365 tree void_ftype_unsigned
15366 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15367 tree void_ftype_unsigned_unsigned
15368 = build_function_type_list (void_type_node, unsigned_type_node,
15369 unsigned_type_node, NULL_TREE);
15370 tree void_ftype_pcvoid_unsigned_unsigned
15371 = build_function_type_list (void_type_node, const_ptr_type_node,
15372 unsigned_type_node, unsigned_type_node,
15373 NULL_TREE);
15374 tree unsigned_ftype_void
15375 = build_function_type (unsigned_type_node, void_list_node);
15376 tree v2si_ftype_v4sf
15377 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15378 /* Loads/stores. */
15379 tree void_ftype_v8qi_v8qi_pchar
15380 = build_function_type_list (void_type_node,
15381 V8QI_type_node, V8QI_type_node,
15382 pchar_type_node, NULL_TREE);
15383 tree v4sf_ftype_pcfloat
15384 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15385 /* @@@ the type is bogus */
15386 tree v4sf_ftype_v4sf_pv2si
15387 = build_function_type_list (V4SF_type_node,
15388 V4SF_type_node, pv2si_type_node, NULL_TREE);
15389 tree void_ftype_pv2si_v4sf
15390 = build_function_type_list (void_type_node,
15391 pv2si_type_node, V4SF_type_node, NULL_TREE);
15392 tree void_ftype_pfloat_v4sf
15393 = build_function_type_list (void_type_node,
15394 pfloat_type_node, V4SF_type_node, NULL_TREE);
15395 tree void_ftype_pdi_di
15396 = build_function_type_list (void_type_node,
15397 pdi_type_node, long_long_unsigned_type_node,
15398 NULL_TREE);
15399 tree void_ftype_pv2di_v2di
15400 = build_function_type_list (void_type_node,
15401 pv2di_type_node, V2DI_type_node, NULL_TREE);
15402 /* Normal vector unops. */
15403 tree v4sf_ftype_v4sf
15404 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15405 tree v16qi_ftype_v16qi
15406 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15407 tree v8hi_ftype_v8hi
15408 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15409 tree v4si_ftype_v4si
15410 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15411 tree v8qi_ftype_v8qi
15412 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
15413 tree v4hi_ftype_v4hi
15414 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
15415
15416 /* Normal vector binops. */
15417 tree v4sf_ftype_v4sf_v4sf
15418 = build_function_type_list (V4SF_type_node,
15419 V4SF_type_node, V4SF_type_node, NULL_TREE);
15420 tree v8qi_ftype_v8qi_v8qi
15421 = build_function_type_list (V8QI_type_node,
15422 V8QI_type_node, V8QI_type_node, NULL_TREE);
15423 tree v4hi_ftype_v4hi_v4hi
15424 = build_function_type_list (V4HI_type_node,
15425 V4HI_type_node, V4HI_type_node, NULL_TREE);
15426 tree v2si_ftype_v2si_v2si
15427 = build_function_type_list (V2SI_type_node,
15428 V2SI_type_node, V2SI_type_node, NULL_TREE);
15429 tree di_ftype_di_di
15430 = build_function_type_list (long_long_unsigned_type_node,
15431 long_long_unsigned_type_node,
15432 long_long_unsigned_type_node, NULL_TREE);
15433
15434 tree di_ftype_di_di_int
15435 = build_function_type_list (long_long_unsigned_type_node,
15436 long_long_unsigned_type_node,
15437 long_long_unsigned_type_node,
15438 integer_type_node, NULL_TREE);
15439
15440 tree v2si_ftype_v2sf
15441 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15442 tree v2sf_ftype_v2si
15443 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15444 tree v2si_ftype_v2si
15445 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15446 tree v2sf_ftype_v2sf
15447 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15448 tree v2sf_ftype_v2sf_v2sf
15449 = build_function_type_list (V2SF_type_node,
15450 V2SF_type_node, V2SF_type_node, NULL_TREE);
15451 tree v2si_ftype_v2sf_v2sf
15452 = build_function_type_list (V2SI_type_node,
15453 V2SF_type_node, V2SF_type_node, NULL_TREE);
15454 tree pint_type_node = build_pointer_type (integer_type_node);
15455 tree pdouble_type_node = build_pointer_type (double_type_node);
15456 tree pcdouble_type_node = build_pointer_type (
15457 build_type_variant (double_type_node, 1, 0));
15458 tree int_ftype_v2df_v2df
15459 = build_function_type_list (integer_type_node,
15460 V2DF_type_node, V2DF_type_node, NULL_TREE);
15461
15462 tree void_ftype_pcvoid
15463 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15464 tree v4sf_ftype_v4si
15465 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15466 tree v4si_ftype_v4sf
15467 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15468 tree v2df_ftype_v4si
15469 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15470 tree v4si_ftype_v2df
15471 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15472 tree v2si_ftype_v2df
15473 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15474 tree v4sf_ftype_v2df
15475 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15476 tree v2df_ftype_v2si
15477 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15478 tree v2df_ftype_v4sf
15479 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15480 tree int_ftype_v2df
15481 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15482 tree int64_ftype_v2df
15483 = build_function_type_list (long_long_integer_type_node,
15484 V2DF_type_node, NULL_TREE);
15485 tree v2df_ftype_v2df_int
15486 = build_function_type_list (V2DF_type_node,
15487 V2DF_type_node, integer_type_node, NULL_TREE);
15488 tree v2df_ftype_v2df_int64
15489 = build_function_type_list (V2DF_type_node,
15490 V2DF_type_node, long_long_integer_type_node,
15491 NULL_TREE);
15492 tree v4sf_ftype_v4sf_v2df
15493 = build_function_type_list (V4SF_type_node,
15494 V4SF_type_node, V2DF_type_node, NULL_TREE);
15495 tree v2df_ftype_v2df_v4sf
15496 = build_function_type_list (V2DF_type_node,
15497 V2DF_type_node, V4SF_type_node, NULL_TREE);
15498 tree v2df_ftype_v2df_v2df_int
15499 = build_function_type_list (V2DF_type_node,
15500 V2DF_type_node, V2DF_type_node,
15501 integer_type_node,
15502 NULL_TREE);
15503 tree v2df_ftype_v2df_pcdouble
15504 = build_function_type_list (V2DF_type_node,
15505 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15506 tree void_ftype_pdouble_v2df
15507 = build_function_type_list (void_type_node,
15508 pdouble_type_node, V2DF_type_node, NULL_TREE);
15509 tree void_ftype_pint_int
15510 = build_function_type_list (void_type_node,
15511 pint_type_node, integer_type_node, NULL_TREE);
15512 tree void_ftype_v16qi_v16qi_pchar
15513 = build_function_type_list (void_type_node,
15514 V16QI_type_node, V16QI_type_node,
15515 pchar_type_node, NULL_TREE);
15516 tree v2df_ftype_pcdouble
15517 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15518 tree v2df_ftype_v2df_v2df
15519 = build_function_type_list (V2DF_type_node,
15520 V2DF_type_node, V2DF_type_node, NULL_TREE);
15521 tree v16qi_ftype_v16qi_v16qi
15522 = build_function_type_list (V16QI_type_node,
15523 V16QI_type_node, V16QI_type_node, NULL_TREE);
15524 tree v8hi_ftype_v8hi_v8hi
15525 = build_function_type_list (V8HI_type_node,
15526 V8HI_type_node, V8HI_type_node, NULL_TREE);
15527 tree v4si_ftype_v4si_v4si
15528 = build_function_type_list (V4SI_type_node,
15529 V4SI_type_node, V4SI_type_node, NULL_TREE);
15530 tree v2di_ftype_v2di_v2di
15531 = build_function_type_list (V2DI_type_node,
15532 V2DI_type_node, V2DI_type_node, NULL_TREE);
15533 tree v2di_ftype_v2df_v2df
15534 = build_function_type_list (V2DI_type_node,
15535 V2DF_type_node, V2DF_type_node, NULL_TREE);
15536 tree v2df_ftype_v2df
15537 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15538 tree v2di_ftype_v2di_int
15539 = build_function_type_list (V2DI_type_node,
15540 V2DI_type_node, integer_type_node, NULL_TREE);
15541 tree v2di_ftype_v2di_v2di_int
15542 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15543 V2DI_type_node, integer_type_node, NULL_TREE);
15544 tree v4si_ftype_v4si_int
15545 = build_function_type_list (V4SI_type_node,
15546 V4SI_type_node, integer_type_node, NULL_TREE);
15547 tree v8hi_ftype_v8hi_int
15548 = build_function_type_list (V8HI_type_node,
15549 V8HI_type_node, integer_type_node, NULL_TREE);
15550 tree v8hi_ftype_v8hi_v2di
15551 = build_function_type_list (V8HI_type_node,
15552 V8HI_type_node, V2DI_type_node, NULL_TREE);
15553 tree v4si_ftype_v4si_v2di
15554 = build_function_type_list (V4SI_type_node,
15555 V4SI_type_node, V2DI_type_node, NULL_TREE);
15556 tree v4si_ftype_v8hi_v8hi
15557 = build_function_type_list (V4SI_type_node,
15558 V8HI_type_node, V8HI_type_node, NULL_TREE);
15559 tree di_ftype_v8qi_v8qi
15560 = build_function_type_list (long_long_unsigned_type_node,
15561 V8QI_type_node, V8QI_type_node, NULL_TREE);
15562 tree di_ftype_v2si_v2si
15563 = build_function_type_list (long_long_unsigned_type_node,
15564 V2SI_type_node, V2SI_type_node, NULL_TREE);
15565 tree v2di_ftype_v16qi_v16qi
15566 = build_function_type_list (V2DI_type_node,
15567 V16QI_type_node, V16QI_type_node, NULL_TREE);
15568 tree v2di_ftype_v4si_v4si
15569 = build_function_type_list (V2DI_type_node,
15570 V4SI_type_node, V4SI_type_node, NULL_TREE);
15571 tree int_ftype_v16qi
15572 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15573 tree v16qi_ftype_pcchar
15574 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15575 tree void_ftype_pchar_v16qi
15576 = build_function_type_list (void_type_node,
15577 pchar_type_node, V16QI_type_node, NULL_TREE);
15578
15579 tree float80_type;
15580 tree float128_type;
15581 tree ftype;
15582
15583 /* The __float80 type. */
15584 if (TYPE_MODE (long_double_type_node) == XFmode)
15585 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15586 "__float80");
15587 else
15588 {
15589 /* The __float80 type. */
15590 float80_type = make_node (REAL_TYPE);
15591 TYPE_PRECISION (float80_type) = 80;
15592 layout_type (float80_type);
15593 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15594 }
15595
15596 if (TARGET_64BIT)
15597 {
15598 float128_type = make_node (REAL_TYPE);
15599 TYPE_PRECISION (float128_type) = 128;
15600 layout_type (float128_type);
15601 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15602 }
15603
15604 /* Add all builtins that are more or less simple operations on two
15605 operands. */
15606 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15607 {
15608 /* Use one of the operands; the target can have a different mode for
15609 mask-generating compares. */
15610 enum machine_mode mode;
15611 tree type;
15612
15613 if (d->name == 0)
15614 continue;
15615 mode = insn_data[d->icode].operand[1].mode;
15616
15617 switch (mode)
15618 {
15619 case V16QImode:
15620 type = v16qi_ftype_v16qi_v16qi;
15621 break;
15622 case V8HImode:
15623 type = v8hi_ftype_v8hi_v8hi;
15624 break;
15625 case V4SImode:
15626 type = v4si_ftype_v4si_v4si;
15627 break;
15628 case V2DImode:
15629 type = v2di_ftype_v2di_v2di;
15630 break;
15631 case V2DFmode:
15632 type = v2df_ftype_v2df_v2df;
15633 break;
15634 case V4SFmode:
15635 type = v4sf_ftype_v4sf_v4sf;
15636 break;
15637 case V8QImode:
15638 type = v8qi_ftype_v8qi_v8qi;
15639 break;
15640 case V4HImode:
15641 type = v4hi_ftype_v4hi_v4hi;
15642 break;
15643 case V2SImode:
15644 type = v2si_ftype_v2si_v2si;
15645 break;
15646 case DImode:
15647 type = di_ftype_di_di;
15648 break;
15649
15650 default:
15651 gcc_unreachable ();
15652 }
15653
15654 /* Override for comparisons. */
15655 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15656 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15657 type = v4si_ftype_v4sf_v4sf;
15658
15659 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15660 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15661 type = v2di_ftype_v2df_v2df;
15662
15663 def_builtin (d->mask, d->name, type, d->code);
15664 }
15665
15666 /* Add all builtins that are more or less simple operations on 1 operand. */
15667 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15668 {
15669 enum machine_mode mode;
15670 tree type;
15671
15672 if (d->name == 0)
15673 continue;
15674 mode = insn_data[d->icode].operand[1].mode;
15675
15676 switch (mode)
15677 {
15678 case V16QImode:
15679 type = v16qi_ftype_v16qi;
15680 break;
15681 case V8HImode:
15682 type = v8hi_ftype_v8hi;
15683 break;
15684 case V4SImode:
15685 type = v4si_ftype_v4si;
15686 break;
15687 case V2DFmode:
15688 type = v2df_ftype_v2df;
15689 break;
15690 case V4SFmode:
15691 type = v4sf_ftype_v4sf;
15692 break;
15693 case V8QImode:
15694 type = v8qi_ftype_v8qi;
15695 break;
15696 case V4HImode:
15697 type = v4hi_ftype_v4hi;
15698 break;
15699 case V2SImode:
15700 type = v2si_ftype_v2si;
15701 break;
15702
15703 default:
15704 abort ();
15705 }
15706
15707 def_builtin (d->mask, d->name, type, d->code);
15708 }
15709
15710 /* Add the remaining MMX insns with somewhat more complicated types. */
15711 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15712 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15713 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15714 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15715
15716 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15717 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15718 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15719
15720 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15721 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15722
15723 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15724 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15725
15726 /* comi/ucomi insns. */
15727 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15728 if (d->mask == MASK_SSE2)
15729 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15730 else
15731 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15732
15733 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15734 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15735 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15736
15737 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15738 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15739 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15740 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15741 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15742 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15743 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15744 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15745 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15746 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15747 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15748
15749 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15750
15751 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15752 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15753
15754 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15755 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15756 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15757 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15758
15759 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15760 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15761 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15762 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15763
15764 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15765
15766 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15767
15768 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15769 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15770 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15771 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15772 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15773 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15774
15775 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15776
15777 /* Original 3DNow! */
15778 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15779 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15780 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15781 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15782 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15783 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15784 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15785 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15786 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15787 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15788 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15789 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15790 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15791 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15792 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15793 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15794 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15795 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15796 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15797 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15798
15799 /* 3DNow! extension as used in the Athlon CPU. */
15800 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15801 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15802 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15803 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15804 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15805 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15806
15807 /* SSE2 */
15808 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15809
15810 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15811 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15812
15813 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15814 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15815
15816 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15817 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15818 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15819 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15820 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15821
15822 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15823 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15824 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15825 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15826
15827 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15828 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15829
15830 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15831
15832 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15833 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15834
15835 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15836 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15837 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15838 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15839 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15840
15841 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15842
15843 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15844 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15845 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15846 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15847
15848 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15849 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15850 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15851
15852 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15853 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15854 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15855 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15856
15857 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15858 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15859 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15860
15861 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15862 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15863
15864 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15865 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15866
15867 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
15868 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
15869 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15870
15871 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
15872 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
15873 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15874
15875 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
15876 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
15877
15878 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15879 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15880 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15881 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15882
15883 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15884 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15885 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15886 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15887
15888 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15889 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15890
15891 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15892
15893 /* Prescott New Instructions. */
15894 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15895 void_ftype_pcvoid_unsigned_unsigned,
15896 IX86_BUILTIN_MONITOR);
15897 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15898 void_ftype_unsigned_unsigned,
15899 IX86_BUILTIN_MWAIT);
15900 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15901 v4sf_ftype_v4sf,
15902 IX86_BUILTIN_MOVSHDUP);
15903 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15904 v4sf_ftype_v4sf,
15905 IX86_BUILTIN_MOVSLDUP);
15906 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15907 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15908
15909 /* SSSE3. */
15910 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
15911 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
15912 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
15913 IX86_BUILTIN_PALIGNR);
15914
15915 /* Access to the vec_init patterns. */
15916 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15917 integer_type_node, NULL_TREE);
15918 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15919 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15920
15921 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15922 short_integer_type_node,
15923 short_integer_type_node,
15924 short_integer_type_node, NULL_TREE);
15925 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15926 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15927
15928 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15929 char_type_node, char_type_node,
15930 char_type_node, char_type_node,
15931 char_type_node, char_type_node,
15932 char_type_node, NULL_TREE);
15933 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15934 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15935
15936 /* Access to the vec_extract patterns. */
15937 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15938 integer_type_node, NULL_TREE);
15939 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
15940 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15941
15942 ftype = build_function_type_list (long_long_integer_type_node,
15943 V2DI_type_node, integer_type_node,
15944 NULL_TREE);
15945 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
15946 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15947
15948 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15949 integer_type_node, NULL_TREE);
15950 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15951 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15952
15953 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15954 integer_type_node, NULL_TREE);
15955 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
15956 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15957
15958 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15959 integer_type_node, NULL_TREE);
15960 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
15961 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15962
15963 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15964 integer_type_node, NULL_TREE);
15965 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15966 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15967
15968 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15969 integer_type_node, NULL_TREE);
15970 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15971 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15972
15973 /* Access to the vec_set patterns. */
15974 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15975 intHI_type_node,
15976 integer_type_node, NULL_TREE);
15977 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
15978 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15979
15980 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15981 intHI_type_node,
15982 integer_type_node, NULL_TREE);
15983 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15984 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15985 }
15986
15987 /* Errors in the source file can cause expand_expr to return const0_rtx
15988 where we expect a vector. To avoid crashing, use one of the vector
15989 clear instructions. */
15990 static rtx
15991 safe_vector_operand (rtx x, enum machine_mode mode)
15992 {
15993 if (x == const0_rtx)
15994 x = CONST0_RTX (mode);
15995 return x;
15996 }
15997
15998 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15999
16000 static rtx
16001 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
16002 {
16003 rtx pat, xops[3];
16004 tree arg0 = TREE_VALUE (arglist);
16005 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16006 rtx op0 = expand_normal (arg0);
16007 rtx op1 = expand_normal (arg1);
16008 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16009 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16010 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
16011
16012 if (VECTOR_MODE_P (mode0))
16013 op0 = safe_vector_operand (op0, mode0);
16014 if (VECTOR_MODE_P (mode1))
16015 op1 = safe_vector_operand (op1, mode1);
16016
16017 if (optimize || !target
16018 || GET_MODE (target) != tmode
16019 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16020 target = gen_reg_rtx (tmode);
16021
16022 if (GET_MODE (op1) == SImode && mode1 == TImode)
16023 {
16024 rtx x = gen_reg_rtx (V4SImode);
16025 emit_insn (gen_sse2_loadd (x, op1));
16026 op1 = gen_lowpart (TImode, x);
16027 }
16028
16029 /* The insn must want input operands in the same modes as the
16030 result. */
16031 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
16032 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
16033
16034 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16035 op0 = copy_to_mode_reg (mode0, op0);
16036 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16037 op1 = copy_to_mode_reg (mode1, op1);
16038
16039 /* ??? Using ix86_fixup_binary_operands is problematic when
16040 we've got mismatched modes. Fake it. */
16041
16042 xops[0] = target;
16043 xops[1] = op0;
16044 xops[2] = op1;
16045
16046 if (tmode == mode0 && tmode == mode1)
16047 {
16048 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
16049 op0 = xops[1];
16050 op1 = xops[2];
16051 }
16052 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
16053 {
16054 op0 = force_reg (mode0, op0);
16055 op1 = force_reg (mode1, op1);
16056 target = gen_reg_rtx (tmode);
16057 }
16058
16059 pat = GEN_FCN (icode) (target, op0, op1);
16060 if (! pat)
16061 return 0;
16062 emit_insn (pat);
16063 return target;
16064 }
16065
16066 /* Subroutine of ix86_expand_builtin to take care of stores. */
16067
16068 static rtx
16069 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
16070 {
16071 rtx pat;
16072 tree arg0 = TREE_VALUE (arglist);
16073 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16074 rtx op0 = expand_normal (arg0);
16075 rtx op1 = expand_normal (arg1);
16076 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
16077 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
16078
16079 if (VECTOR_MODE_P (mode1))
16080 op1 = safe_vector_operand (op1, mode1);
16081
16082 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16083 op1 = copy_to_mode_reg (mode1, op1);
16084
16085 pat = GEN_FCN (icode) (op0, op1);
16086 if (pat)
16087 emit_insn (pat);
16088 return 0;
16089 }
16090
16091 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
16092
16093 static rtx
16094 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
16095 rtx target, int do_load)
16096 {
16097 rtx pat;
16098 tree arg0 = TREE_VALUE (arglist);
16099 rtx op0 = expand_normal (arg0);
16100 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16101 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16102
16103 if (optimize || !target
16104 || GET_MODE (target) != tmode
16105 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16106 target = gen_reg_rtx (tmode);
16107 if (do_load)
16108 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16109 else
16110 {
16111 if (VECTOR_MODE_P (mode0))
16112 op0 = safe_vector_operand (op0, mode0);
16113
16114 if ((optimize && !register_operand (op0, mode0))
16115 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16116 op0 = copy_to_mode_reg (mode0, op0);
16117 }
16118
16119 pat = GEN_FCN (icode) (target, op0);
16120 if (! pat)
16121 return 0;
16122 emit_insn (pat);
16123 return target;
16124 }
16125
16126 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
16127 sqrtss, rsqrtss, rcpss. */
16128
16129 static rtx
16130 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
16131 {
16132 rtx pat;
16133 tree arg0 = TREE_VALUE (arglist);
16134 rtx op1, op0 = expand_normal (arg0);
16135 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16136 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16137
16138 if (optimize || !target
16139 || GET_MODE (target) != tmode
16140 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16141 target = gen_reg_rtx (tmode);
16142
16143 if (VECTOR_MODE_P (mode0))
16144 op0 = safe_vector_operand (op0, mode0);
16145
16146 if ((optimize && !register_operand (op0, mode0))
16147 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16148 op0 = copy_to_mode_reg (mode0, op0);
16149
16150 op1 = op0;
16151 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
16152 op1 = copy_to_mode_reg (mode0, op1);
16153
16154 pat = GEN_FCN (icode) (target, op0, op1);
16155 if (! pat)
16156 return 0;
16157 emit_insn (pat);
16158 return target;
16159 }
16160
16161 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
16162
16163 static rtx
16164 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
16165 rtx target)
16166 {
16167 rtx pat;
16168 tree arg0 = TREE_VALUE (arglist);
16169 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16170 rtx op0 = expand_normal (arg0);
16171 rtx op1 = expand_normal (arg1);
16172 rtx op2;
16173 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
16174 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
16175 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
16176 enum rtx_code comparison = d->comparison;
16177
16178 if (VECTOR_MODE_P (mode0))
16179 op0 = safe_vector_operand (op0, mode0);
16180 if (VECTOR_MODE_P (mode1))
16181 op1 = safe_vector_operand (op1, mode1);
16182
16183 /* Swap operands if we have a comparison that isn't available in
16184 hardware. */
16185 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16186 {
16187 rtx tmp = gen_reg_rtx (mode1);
16188 emit_move_insn (tmp, op1);
16189 op1 = op0;
16190 op0 = tmp;
16191 }
16192
16193 if (optimize || !target
16194 || GET_MODE (target) != tmode
16195 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
16196 target = gen_reg_rtx (tmode);
16197
16198 if ((optimize && !register_operand (op0, mode0))
16199 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
16200 op0 = copy_to_mode_reg (mode0, op0);
16201 if ((optimize && !register_operand (op1, mode1))
16202 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
16203 op1 = copy_to_mode_reg (mode1, op1);
16204
16205 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16206 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
16207 if (! pat)
16208 return 0;
16209 emit_insn (pat);
16210 return target;
16211 }
16212
16213 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
16214
16215 static rtx
16216 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
16217 rtx target)
16218 {
16219 rtx pat;
16220 tree arg0 = TREE_VALUE (arglist);
16221 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16222 rtx op0 = expand_normal (arg0);
16223 rtx op1 = expand_normal (arg1);
16224 rtx op2;
16225 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
16226 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
16227 enum rtx_code comparison = d->comparison;
16228
16229 if (VECTOR_MODE_P (mode0))
16230 op0 = safe_vector_operand (op0, mode0);
16231 if (VECTOR_MODE_P (mode1))
16232 op1 = safe_vector_operand (op1, mode1);
16233
16234 /* Swap operands if we have a comparison that isn't available in
16235 hardware. */
16236 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16237 {
16238 rtx tmp = op1;
16239 op1 = op0;
16240 op0 = tmp;
16241 }
16242
16243 target = gen_reg_rtx (SImode);
16244 emit_move_insn (target, const0_rtx);
16245 target = gen_rtx_SUBREG (QImode, target, 0);
16246
16247 if ((optimize && !register_operand (op0, mode0))
16248 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
16249 op0 = copy_to_mode_reg (mode0, op0);
16250 if ((optimize && !register_operand (op1, mode1))
16251 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
16252 op1 = copy_to_mode_reg (mode1, op1);
16253
16254 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16255 pat = GEN_FCN (d->icode) (op0, op1);
16256 if (! pat)
16257 return 0;
16258 emit_insn (pat);
16259 emit_insn (gen_rtx_SET (VOIDmode,
16260 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
16261 gen_rtx_fmt_ee (comparison, QImode,
16262 SET_DEST (pat),
16263 const0_rtx)));
16264
16265 return SUBREG_REG (target);
16266 }
16267
16268 /* Return the integer constant in ARG. Constrain it to be in the range
16269 of the subparts of VEC_TYPE; issue an error if not. */
16270
16271 static int
16272 get_element_number (tree vec_type, tree arg)
16273 {
16274 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16275
16276 if (!host_integerp (arg, 1)
16277 || (elt = tree_low_cst (arg, 1), elt > max))
16278 {
16279 error ("selector must be an integer constant in the range 0..%wi", max);
16280 return 0;
16281 }
16282
16283 return elt;
16284 }
16285
16286 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16287 ix86_expand_vector_init. We DO have language-level syntax for this, in
16288 the form of (type){ init-list }. Except that since we can't place emms
16289 instructions from inside the compiler, we can't allow the use of MMX
16290 registers unless the user explicitly asks for it. So we do *not* define
16291 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
16292 we have builtins invoked by mmintrin.h that gives us license to emit
16293 these sorts of instructions. */
16294
16295 static rtx
16296 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
16297 {
16298 enum machine_mode tmode = TYPE_MODE (type);
16299 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
16300 int i, n_elt = GET_MODE_NUNITS (tmode);
16301 rtvec v = rtvec_alloc (n_elt);
16302
16303 gcc_assert (VECTOR_MODE_P (tmode));
16304
16305 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
16306 {
16307 rtx x = expand_normal (TREE_VALUE (arglist));
16308 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16309 }
16310
16311 gcc_assert (arglist == NULL);
16312
16313 if (!target || !register_operand (target, tmode))
16314 target = gen_reg_rtx (tmode);
16315
16316 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
16317 return target;
16318 }
16319
16320 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16321 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
16322 had a language-level syntax for referencing vector elements. */
16323
16324 static rtx
16325 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
16326 {
16327 enum machine_mode tmode, mode0;
16328 tree arg0, arg1;
16329 int elt;
16330 rtx op0;
16331
16332 arg0 = TREE_VALUE (arglist);
16333 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16334
16335 op0 = expand_normal (arg0);
16336 elt = get_element_number (TREE_TYPE (arg0), arg1);
16337
16338 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16339 mode0 = TYPE_MODE (TREE_TYPE (arg0));
16340 gcc_assert (VECTOR_MODE_P (mode0));
16341
16342 op0 = force_reg (mode0, op0);
16343
16344 if (optimize || !target || !register_operand (target, tmode))
16345 target = gen_reg_rtx (tmode);
16346
16347 ix86_expand_vector_extract (true, target, op0, elt);
16348
16349 return target;
16350 }
16351
16352 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16353 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
16354 a language-level syntax for referencing vector elements. */
16355
16356 static rtx
16357 ix86_expand_vec_set_builtin (tree arglist)
16358 {
16359 enum machine_mode tmode, mode1;
16360 tree arg0, arg1, arg2;
16361 int elt;
16362 rtx op0, op1;
16363
16364 arg0 = TREE_VALUE (arglist);
16365 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16366 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16367
16368 tmode = TYPE_MODE (TREE_TYPE (arg0));
16369 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16370 gcc_assert (VECTOR_MODE_P (tmode));
16371
16372 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
16373 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
16374 elt = get_element_number (TREE_TYPE (arg0), arg2);
16375
16376 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16377 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16378
16379 op0 = force_reg (tmode, op0);
16380 op1 = force_reg (mode1, op1);
16381
16382 ix86_expand_vector_set (true, op0, op1, elt);
16383
16384 return op0;
16385 }
16386
16387 /* Expand an expression EXP that calls a built-in function,
16388 with result going to TARGET if that's convenient
16389 (and in mode MODE if that's convenient).
16390 SUBTARGET may be used as the target for computing one of EXP's operands.
16391 IGNORE is nonzero if the value is to be ignored. */
16392
16393 static rtx
16394 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16395 enum machine_mode mode ATTRIBUTE_UNUSED,
16396 int ignore ATTRIBUTE_UNUSED)
16397 {
16398 const struct builtin_description *d;
16399 size_t i;
16400 enum insn_code icode;
16401 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16402 tree arglist = TREE_OPERAND (exp, 1);
16403 tree arg0, arg1, arg2;
16404 rtx op0, op1, op2, pat;
16405 enum machine_mode tmode, mode0, mode1, mode2, mode3;
16406 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16407
16408 switch (fcode)
16409 {
16410 case IX86_BUILTIN_EMMS:
16411 emit_insn (gen_mmx_emms ());
16412 return 0;
16413
16414 case IX86_BUILTIN_SFENCE:
16415 emit_insn (gen_sse_sfence ());
16416 return 0;
16417
16418 case IX86_BUILTIN_MASKMOVQ:
16419 case IX86_BUILTIN_MASKMOVDQU:
16420 icode = (fcode == IX86_BUILTIN_MASKMOVQ
16421 ? CODE_FOR_mmx_maskmovq
16422 : CODE_FOR_sse2_maskmovdqu);
16423 /* Note the arg order is different from the operand order. */
16424 arg1 = TREE_VALUE (arglist);
16425 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16426 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16427 op0 = expand_normal (arg0);
16428 op1 = expand_normal (arg1);
16429 op2 = expand_normal (arg2);
16430 mode0 = insn_data[icode].operand[0].mode;
16431 mode1 = insn_data[icode].operand[1].mode;
16432 mode2 = insn_data[icode].operand[2].mode;
16433
16434 op0 = force_reg (Pmode, op0);
16435 op0 = gen_rtx_MEM (mode1, op0);
16436
16437 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16438 op0 = copy_to_mode_reg (mode0, op0);
16439 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16440 op1 = copy_to_mode_reg (mode1, op1);
16441 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16442 op2 = copy_to_mode_reg (mode2, op2);
16443 pat = GEN_FCN (icode) (op0, op1, op2);
16444 if (! pat)
16445 return 0;
16446 emit_insn (pat);
16447 return 0;
16448
16449 case IX86_BUILTIN_SQRTSS:
16450 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16451 case IX86_BUILTIN_RSQRTSS:
16452 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16453 case IX86_BUILTIN_RCPSS:
16454 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16455
16456 case IX86_BUILTIN_LOADUPS:
16457 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16458
16459 case IX86_BUILTIN_STOREUPS:
16460 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16461
16462 case IX86_BUILTIN_LOADHPS:
16463 case IX86_BUILTIN_LOADLPS:
16464 case IX86_BUILTIN_LOADHPD:
16465 case IX86_BUILTIN_LOADLPD:
16466 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16467 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16468 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16469 : CODE_FOR_sse2_loadlpd);
16470 arg0 = TREE_VALUE (arglist);
16471 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16472 op0 = expand_normal (arg0);
16473 op1 = expand_normal (arg1);
16474 tmode = insn_data[icode].operand[0].mode;
16475 mode0 = insn_data[icode].operand[1].mode;
16476 mode1 = insn_data[icode].operand[2].mode;
16477
16478 op0 = force_reg (mode0, op0);
16479 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16480 if (optimize || target == 0
16481 || GET_MODE (target) != tmode
16482 || !register_operand (target, tmode))
16483 target = gen_reg_rtx (tmode);
16484 pat = GEN_FCN (icode) (target, op0, op1);
16485 if (! pat)
16486 return 0;
16487 emit_insn (pat);
16488 return target;
16489
16490 case IX86_BUILTIN_STOREHPS:
16491 case IX86_BUILTIN_STORELPS:
16492 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16493 : CODE_FOR_sse_storelps);
16494 arg0 = TREE_VALUE (arglist);
16495 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16496 op0 = expand_normal (arg0);
16497 op1 = expand_normal (arg1);
16498 mode0 = insn_data[icode].operand[0].mode;
16499 mode1 = insn_data[icode].operand[1].mode;
16500
16501 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16502 op1 = force_reg (mode1, op1);
16503
16504 pat = GEN_FCN (icode) (op0, op1);
16505 if (! pat)
16506 return 0;
16507 emit_insn (pat);
16508 return const0_rtx;
16509
16510 case IX86_BUILTIN_MOVNTPS:
16511 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16512 case IX86_BUILTIN_MOVNTQ:
16513 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16514
16515 case IX86_BUILTIN_LDMXCSR:
16516 op0 = expand_normal (TREE_VALUE (arglist));
16517 target = assign_386_stack_local (SImode, SLOT_TEMP);
16518 emit_move_insn (target, op0);
16519 emit_insn (gen_sse_ldmxcsr (target));
16520 return 0;
16521
16522 case IX86_BUILTIN_STMXCSR:
16523 target = assign_386_stack_local (SImode, SLOT_TEMP);
16524 emit_insn (gen_sse_stmxcsr (target));
16525 return copy_to_mode_reg (SImode, target);
16526
16527 case IX86_BUILTIN_SHUFPS:
16528 case IX86_BUILTIN_SHUFPD:
16529 icode = (fcode == IX86_BUILTIN_SHUFPS
16530 ? CODE_FOR_sse_shufps
16531 : CODE_FOR_sse2_shufpd);
16532 arg0 = TREE_VALUE (arglist);
16533 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16534 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16535 op0 = expand_normal (arg0);
16536 op1 = expand_normal (arg1);
16537 op2 = expand_normal (arg2);
16538 tmode = insn_data[icode].operand[0].mode;
16539 mode0 = insn_data[icode].operand[1].mode;
16540 mode1 = insn_data[icode].operand[2].mode;
16541 mode2 = insn_data[icode].operand[3].mode;
16542
16543 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16544 op0 = copy_to_mode_reg (mode0, op0);
16545 if ((optimize && !register_operand (op1, mode1))
16546 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16547 op1 = copy_to_mode_reg (mode1, op1);
16548 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16549 {
16550 /* @@@ better error message */
16551 error ("mask must be an immediate");
16552 return gen_reg_rtx (tmode);
16553 }
16554 if (optimize || target == 0
16555 || GET_MODE (target) != tmode
16556 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16557 target = gen_reg_rtx (tmode);
16558 pat = GEN_FCN (icode) (target, op0, op1, op2);
16559 if (! pat)
16560 return 0;
16561 emit_insn (pat);
16562 return target;
16563
16564 case IX86_BUILTIN_PSHUFW:
16565 case IX86_BUILTIN_PSHUFD:
16566 case IX86_BUILTIN_PSHUFHW:
16567 case IX86_BUILTIN_PSHUFLW:
16568 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16569 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16570 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16571 : CODE_FOR_mmx_pshufw);
16572 arg0 = TREE_VALUE (arglist);
16573 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16574 op0 = expand_normal (arg0);
16575 op1 = expand_normal (arg1);
16576 tmode = insn_data[icode].operand[0].mode;
16577 mode1 = insn_data[icode].operand[1].mode;
16578 mode2 = insn_data[icode].operand[2].mode;
16579
16580 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16581 op0 = copy_to_mode_reg (mode1, op0);
16582 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16583 {
16584 /* @@@ better error message */
16585 error ("mask must be an immediate");
16586 return const0_rtx;
16587 }
16588 if (target == 0
16589 || GET_MODE (target) != tmode
16590 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16591 target = gen_reg_rtx (tmode);
16592 pat = GEN_FCN (icode) (target, op0, op1);
16593 if (! pat)
16594 return 0;
16595 emit_insn (pat);
16596 return target;
16597
16598 case IX86_BUILTIN_PSLLDQI128:
16599 case IX86_BUILTIN_PSRLDQI128:
16600 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16601 : CODE_FOR_sse2_lshrti3);
16602 arg0 = TREE_VALUE (arglist);
16603 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16604 op0 = expand_normal (arg0);
16605 op1 = expand_normal (arg1);
16606 tmode = insn_data[icode].operand[0].mode;
16607 mode1 = insn_data[icode].operand[1].mode;
16608 mode2 = insn_data[icode].operand[2].mode;
16609
16610 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16611 {
16612 op0 = copy_to_reg (op0);
16613 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16614 }
16615 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16616 {
16617 error ("shift must be an immediate");
16618 return const0_rtx;
16619 }
16620 target = gen_reg_rtx (V2DImode);
16621 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
16622 if (! pat)
16623 return 0;
16624 emit_insn (pat);
16625 return target;
16626
16627 case IX86_BUILTIN_FEMMS:
16628 emit_insn (gen_mmx_femms ());
16629 return NULL_RTX;
16630
16631 case IX86_BUILTIN_PAVGUSB:
16632 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16633
16634 case IX86_BUILTIN_PF2ID:
16635 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16636
16637 case IX86_BUILTIN_PFACC:
16638 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16639
16640 case IX86_BUILTIN_PFADD:
16641 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16642
16643 case IX86_BUILTIN_PFCMPEQ:
16644 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16645
16646 case IX86_BUILTIN_PFCMPGE:
16647 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16648
16649 case IX86_BUILTIN_PFCMPGT:
16650 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16651
16652 case IX86_BUILTIN_PFMAX:
16653 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16654
16655 case IX86_BUILTIN_PFMIN:
16656 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16657
16658 case IX86_BUILTIN_PFMUL:
16659 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16660
16661 case IX86_BUILTIN_PFRCP:
16662 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16663
16664 case IX86_BUILTIN_PFRCPIT1:
16665 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16666
16667 case IX86_BUILTIN_PFRCPIT2:
16668 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16669
16670 case IX86_BUILTIN_PFRSQIT1:
16671 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16672
16673 case IX86_BUILTIN_PFRSQRT:
16674 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16675
16676 case IX86_BUILTIN_PFSUB:
16677 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16678
16679 case IX86_BUILTIN_PFSUBR:
16680 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16681
16682 case IX86_BUILTIN_PI2FD:
16683 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16684
16685 case IX86_BUILTIN_PMULHRW:
16686 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16687
16688 case IX86_BUILTIN_PF2IW:
16689 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16690
16691 case IX86_BUILTIN_PFNACC:
16692 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16693
16694 case IX86_BUILTIN_PFPNACC:
16695 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16696
16697 case IX86_BUILTIN_PI2FW:
16698 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16699
16700 case IX86_BUILTIN_PSWAPDSI:
16701 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16702
16703 case IX86_BUILTIN_PSWAPDSF:
16704 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16705
16706 case IX86_BUILTIN_SQRTSD:
16707 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16708 case IX86_BUILTIN_LOADUPD:
16709 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16710 case IX86_BUILTIN_STOREUPD:
16711 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16712
16713 case IX86_BUILTIN_MFENCE:
16714 emit_insn (gen_sse2_mfence ());
16715 return 0;
16716 case IX86_BUILTIN_LFENCE:
16717 emit_insn (gen_sse2_lfence ());
16718 return 0;
16719
16720 case IX86_BUILTIN_CLFLUSH:
16721 arg0 = TREE_VALUE (arglist);
16722 op0 = expand_normal (arg0);
16723 icode = CODE_FOR_sse2_clflush;
16724 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16725 op0 = copy_to_mode_reg (Pmode, op0);
16726
16727 emit_insn (gen_sse2_clflush (op0));
16728 return 0;
16729
16730 case IX86_BUILTIN_MOVNTPD:
16731 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16732 case IX86_BUILTIN_MOVNTDQ:
16733 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16734 case IX86_BUILTIN_MOVNTI:
16735 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16736
16737 case IX86_BUILTIN_LOADDQU:
16738 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16739 case IX86_BUILTIN_STOREDQU:
16740 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16741
16742 case IX86_BUILTIN_MONITOR:
16743 arg0 = TREE_VALUE (arglist);
16744 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16745 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16746 op0 = expand_normal (arg0);
16747 op1 = expand_normal (arg1);
16748 op2 = expand_normal (arg2);
16749 if (!REG_P (op0))
16750 op0 = copy_to_mode_reg (Pmode, op0);
16751 if (!REG_P (op1))
16752 op1 = copy_to_mode_reg (SImode, op1);
16753 if (!REG_P (op2))
16754 op2 = copy_to_mode_reg (SImode, op2);
16755 if (!TARGET_64BIT)
16756 emit_insn (gen_sse3_monitor (op0, op1, op2));
16757 else
16758 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16759 return 0;
16760
16761 case IX86_BUILTIN_MWAIT:
16762 arg0 = TREE_VALUE (arglist);
16763 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16764 op0 = expand_normal (arg0);
16765 op1 = expand_normal (arg1);
16766 if (!REG_P (op0))
16767 op0 = copy_to_mode_reg (SImode, op0);
16768 if (!REG_P (op1))
16769 op1 = copy_to_mode_reg (SImode, op1);
16770 emit_insn (gen_sse3_mwait (op0, op1));
16771 return 0;
16772
16773 case IX86_BUILTIN_LDDQU:
16774 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16775 target, 1);
16776
16777 case IX86_BUILTIN_PALIGNR:
16778 case IX86_BUILTIN_PALIGNR128:
16779 if (fcode == IX86_BUILTIN_PALIGNR)
16780 {
16781 icode = CODE_FOR_ssse3_palignrdi;
16782 mode = DImode;
16783 }
16784 else
16785 {
16786 icode = CODE_FOR_ssse3_palignrti;
16787 mode = V2DImode;
16788 }
16789 arg0 = TREE_VALUE (arglist);
16790 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16791 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16792 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16793 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16794 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
16795 tmode = insn_data[icode].operand[0].mode;
16796 mode1 = insn_data[icode].operand[1].mode;
16797 mode2 = insn_data[icode].operand[2].mode;
16798 mode3 = insn_data[icode].operand[3].mode;
16799
16800 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16801 {
16802 op0 = copy_to_reg (op0);
16803 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16804 }
16805 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16806 {
16807 op1 = copy_to_reg (op1);
16808 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
16809 }
16810 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
16811 {
16812 error ("shift must be an immediate");
16813 return const0_rtx;
16814 }
16815 target = gen_reg_rtx (mode);
16816 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
16817 op0, op1, op2);
16818 if (! pat)
16819 return 0;
16820 emit_insn (pat);
16821 return target;
16822
16823 case IX86_BUILTIN_VEC_INIT_V2SI:
16824 case IX86_BUILTIN_VEC_INIT_V4HI:
16825 case IX86_BUILTIN_VEC_INIT_V8QI:
16826 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16827
16828 case IX86_BUILTIN_VEC_EXT_V2DF:
16829 case IX86_BUILTIN_VEC_EXT_V2DI:
16830 case IX86_BUILTIN_VEC_EXT_V4SF:
16831 case IX86_BUILTIN_VEC_EXT_V4SI:
16832 case IX86_BUILTIN_VEC_EXT_V8HI:
16833 case IX86_BUILTIN_VEC_EXT_V2SI:
16834 case IX86_BUILTIN_VEC_EXT_V4HI:
16835 return ix86_expand_vec_ext_builtin (arglist, target);
16836
16837 case IX86_BUILTIN_VEC_SET_V8HI:
16838 case IX86_BUILTIN_VEC_SET_V4HI:
16839 return ix86_expand_vec_set_builtin (arglist);
16840
16841 default:
16842 break;
16843 }
16844
16845 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16846 if (d->code == fcode)
16847 {
16848 /* Compares are treated specially. */
16849 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16850 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16851 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16852 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16853 return ix86_expand_sse_compare (d, arglist, target);
16854
16855 return ix86_expand_binop_builtin (d->icode, arglist, target);
16856 }
16857
16858 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16859 if (d->code == fcode)
16860 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16861
16862 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16863 if (d->code == fcode)
16864 return ix86_expand_sse_comi (d, arglist, target);
16865
16866 gcc_unreachable ();
16867 }
16868
16869 /* Store OPERAND to the memory after reload is completed. This means
16870 that we can't easily use assign_stack_local. */
16871 rtx
16872 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16873 {
16874 rtx result;
16875
16876 gcc_assert (reload_completed);
16877 if (TARGET_RED_ZONE)
16878 {
16879 result = gen_rtx_MEM (mode,
16880 gen_rtx_PLUS (Pmode,
16881 stack_pointer_rtx,
16882 GEN_INT (-RED_ZONE_SIZE)));
16883 emit_move_insn (result, operand);
16884 }
16885 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16886 {
16887 switch (mode)
16888 {
16889 case HImode:
16890 case SImode:
16891 operand = gen_lowpart (DImode, operand);
16892 /* FALLTHRU */
16893 case DImode:
16894 emit_insn (
16895 gen_rtx_SET (VOIDmode,
16896 gen_rtx_MEM (DImode,
16897 gen_rtx_PRE_DEC (DImode,
16898 stack_pointer_rtx)),
16899 operand));
16900 break;
16901 default:
16902 gcc_unreachable ();
16903 }
16904 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16905 }
16906 else
16907 {
16908 switch (mode)
16909 {
16910 case DImode:
16911 {
16912 rtx operands[2];
16913 split_di (&operand, 1, operands, operands + 1);
16914 emit_insn (
16915 gen_rtx_SET (VOIDmode,
16916 gen_rtx_MEM (SImode,
16917 gen_rtx_PRE_DEC (Pmode,
16918 stack_pointer_rtx)),
16919 operands[1]));
16920 emit_insn (
16921 gen_rtx_SET (VOIDmode,
16922 gen_rtx_MEM (SImode,
16923 gen_rtx_PRE_DEC (Pmode,
16924 stack_pointer_rtx)),
16925 operands[0]));
16926 }
16927 break;
16928 case HImode:
16929 /* Store HImodes as SImodes. */
16930 operand = gen_lowpart (SImode, operand);
16931 /* FALLTHRU */
16932 case SImode:
16933 emit_insn (
16934 gen_rtx_SET (VOIDmode,
16935 gen_rtx_MEM (GET_MODE (operand),
16936 gen_rtx_PRE_DEC (SImode,
16937 stack_pointer_rtx)),
16938 operand));
16939 break;
16940 default:
16941 gcc_unreachable ();
16942 }
16943 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16944 }
16945 return result;
16946 }
16947
16948 /* Free operand from the memory. */
16949 void
16950 ix86_free_from_memory (enum machine_mode mode)
16951 {
16952 if (!TARGET_RED_ZONE)
16953 {
16954 int size;
16955
16956 if (mode == DImode || TARGET_64BIT)
16957 size = 8;
16958 else
16959 size = 4;
16960 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16961 to pop or add instruction if registers are available. */
16962 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16963 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16964 GEN_INT (size))));
16965 }
16966 }
16967
16968 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16969 QImode must go into class Q_REGS.
16970 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16971 movdf to do mem-to-mem moves through integer regs. */
16972 enum reg_class
16973 ix86_preferred_reload_class (rtx x, enum reg_class class)
16974 {
16975 enum machine_mode mode = GET_MODE (x);
16976
16977 /* We're only allowed to return a subclass of CLASS. Many of the
16978 following checks fail for NO_REGS, so eliminate that early. */
16979 if (class == NO_REGS)
16980 return NO_REGS;
16981
16982 /* All classes can load zeros. */
16983 if (x == CONST0_RTX (mode))
16984 return class;
16985
16986 /* Force constants into memory if we are loading a (nonzero) constant into
16987 an MMX or SSE register. This is because there are no MMX/SSE instructions
16988 to load from a constant. */
16989 if (CONSTANT_P (x)
16990 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16991 return NO_REGS;
16992
16993 /* Prefer SSE regs only, if we can use them for math. */
16994 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16995 return SSE_CLASS_P (class) ? class : NO_REGS;
16996
16997 /* Floating-point constants need more complex checks. */
16998 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16999 {
17000 /* General regs can load everything. */
17001 if (reg_class_subset_p (class, GENERAL_REGS))
17002 return class;
17003
17004 /* Floats can load 0 and 1 plus some others. Note that we eliminated
17005 zero above. We only want to wind up preferring 80387 registers if
17006 we plan on doing computation with them. */
17007 if (TARGET_80387
17008 && standard_80387_constant_p (x))
17009 {
17010 /* Limit class to non-sse. */
17011 if (class == FLOAT_SSE_REGS)
17012 return FLOAT_REGS;
17013 if (class == FP_TOP_SSE_REGS)
17014 return FP_TOP_REG;
17015 if (class == FP_SECOND_SSE_REGS)
17016 return FP_SECOND_REG;
17017 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
17018 return class;
17019 }
17020
17021 return NO_REGS;
17022 }
17023
17024 /* Generally when we see PLUS here, it's the function invariant
17025 (plus soft-fp const_int). Which can only be computed into general
17026 regs. */
17027 if (GET_CODE (x) == PLUS)
17028 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
17029
17030 /* QImode constants are easy to load, but non-constant QImode data
17031 must go into Q_REGS. */
17032 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
17033 {
17034 if (reg_class_subset_p (class, Q_REGS))
17035 return class;
17036 if (reg_class_subset_p (Q_REGS, class))
17037 return Q_REGS;
17038 return NO_REGS;
17039 }
17040
17041 return class;
17042 }
17043
17044 /* Discourage putting floating-point values in SSE registers unless
17045 SSE math is being used, and likewise for the 387 registers. */
17046 enum reg_class
17047 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
17048 {
17049 enum machine_mode mode = GET_MODE (x);
17050
17051 /* Restrict the output reload class to the register bank that we are doing
17052 math on. If we would like not to return a subset of CLASS, reject this
17053 alternative: if reload cannot do this, it will still use its choice. */
17054 mode = GET_MODE (x);
17055 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17056 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
17057
17058 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
17059 {
17060 if (class == FP_TOP_SSE_REGS)
17061 return FP_TOP_REG;
17062 else if (class == FP_SECOND_SSE_REGS)
17063 return FP_SECOND_REG;
17064 else
17065 return FLOAT_CLASS_P (class) ? class : NO_REGS;
17066 }
17067
17068 return class;
17069 }
17070
17071 /* If we are copying between general and FP registers, we need a memory
17072 location. The same is true for SSE and MMX registers.
17073
17074 The macro can't work reliably when one of the CLASSES is class containing
17075 registers from multiple units (SSE, MMX, integer). We avoid this by never
17076 combining those units in single alternative in the machine description.
17077 Ensure that this constraint holds to avoid unexpected surprises.
17078
17079 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
17080 enforce these sanity checks. */
17081
17082 int
17083 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
17084 enum machine_mode mode, int strict)
17085 {
17086 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
17087 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
17088 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
17089 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
17090 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
17091 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
17092 {
17093 gcc_assert (!strict);
17094 return true;
17095 }
17096
17097 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
17098 return true;
17099
17100 /* ??? This is a lie. We do have moves between mmx/general, and for
17101 mmx/sse2. But by saying we need secondary memory we discourage the
17102 register allocator from using the mmx registers unless needed. */
17103 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
17104 return true;
17105
17106 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17107 {
17108 /* SSE1 doesn't have any direct moves from other classes. */
17109 if (!TARGET_SSE2)
17110 return true;
17111
17112 /* If the target says that inter-unit moves are more expensive
17113 than moving through memory, then don't generate them. */
17114 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
17115 return true;
17116
17117 /* Between SSE and general, we have moves no larger than word size. */
17118 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
17119 return true;
17120
17121 /* ??? For the cost of one register reformat penalty, we could use
17122 the same instructions to move SFmode and DFmode data, but the
17123 relevant move patterns don't support those alternatives. */
17124 if (mode == SFmode || mode == DFmode)
17125 return true;
17126 }
17127
17128 return false;
17129 }
17130
17131 /* Return true if the registers in CLASS cannot represent the change from
17132 modes FROM to TO. */
17133
17134 bool
17135 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
17136 enum reg_class class)
17137 {
17138 if (from == to)
17139 return false;
17140
17141 /* x87 registers can't do subreg at all, as all values are reformatted
17142 to extended precision. */
17143 if (MAYBE_FLOAT_CLASS_P (class))
17144 return true;
17145
17146 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
17147 {
17148 /* Vector registers do not support QI or HImode loads. If we don't
17149 disallow a change to these modes, reload will assume it's ok to
17150 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
17151 the vec_dupv4hi pattern. */
17152 if (GET_MODE_SIZE (from) < 4)
17153 return true;
17154
17155 /* Vector registers do not support subreg with nonzero offsets, which
17156 are otherwise valid for integer registers. Since we can't see
17157 whether we have a nonzero offset from here, prohibit all
17158 nonparadoxical subregs changing size. */
17159 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
17160 return true;
17161 }
17162
17163 return false;
17164 }
17165
17166 /* Return the cost of moving data from a register in class CLASS1 to
17167 one in class CLASS2.
17168
17169 It is not required that the cost always equal 2 when FROM is the same as TO;
17170 on some machines it is expensive to move between registers if they are not
17171 general registers. */
17172
17173 int
17174 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
17175 enum reg_class class2)
17176 {
17177 /* In case we require secondary memory, compute cost of the store followed
17178 by load. In order to avoid bad register allocation choices, we need
17179 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
17180
17181 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
17182 {
17183 int cost = 1;
17184
17185 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
17186 MEMORY_MOVE_COST (mode, class1, 1));
17187 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
17188 MEMORY_MOVE_COST (mode, class2, 1));
17189
17190 /* In case of copying from general_purpose_register we may emit multiple
17191 stores followed by single load causing memory size mismatch stall.
17192 Count this as arbitrarily high cost of 20. */
17193 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
17194 cost += 20;
17195
17196 /* In the case of FP/MMX moves, the registers actually overlap, and we
17197 have to switch modes in order to treat them differently. */
17198 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
17199 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
17200 cost += 20;
17201
17202 return cost;
17203 }
17204
17205 /* Moves between SSE/MMX and integer unit are expensive. */
17206 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
17207 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17208 return ix86_cost->mmxsse_to_integer;
17209 if (MAYBE_FLOAT_CLASS_P (class1))
17210 return ix86_cost->fp_move;
17211 if (MAYBE_SSE_CLASS_P (class1))
17212 return ix86_cost->sse_move;
17213 if (MAYBE_MMX_CLASS_P (class1))
17214 return ix86_cost->mmx_move;
17215 return 2;
17216 }
17217
17218 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
17219
17220 bool
17221 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
17222 {
17223 /* Flags and only flags can only hold CCmode values. */
17224 if (CC_REGNO_P (regno))
17225 return GET_MODE_CLASS (mode) == MODE_CC;
17226 if (GET_MODE_CLASS (mode) == MODE_CC
17227 || GET_MODE_CLASS (mode) == MODE_RANDOM
17228 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
17229 return 0;
17230 if (FP_REGNO_P (regno))
17231 return VALID_FP_MODE_P (mode);
17232 if (SSE_REGNO_P (regno))
17233 {
17234 /* We implement the move patterns for all vector modes into and
17235 out of SSE registers, even when no operation instructions
17236 are available. */
17237 return (VALID_SSE_REG_MODE (mode)
17238 || VALID_SSE2_REG_MODE (mode)
17239 || VALID_MMX_REG_MODE (mode)
17240 || VALID_MMX_REG_MODE_3DNOW (mode));
17241 }
17242 if (MMX_REGNO_P (regno))
17243 {
17244 /* We implement the move patterns for 3DNOW modes even in MMX mode,
17245 so if the register is available at all, then we can move data of
17246 the given mode into or out of it. */
17247 return (VALID_MMX_REG_MODE (mode)
17248 || VALID_MMX_REG_MODE_3DNOW (mode));
17249 }
17250
17251 if (mode == QImode)
17252 {
17253 /* Take care for QImode values - they can be in non-QI regs,
17254 but then they do cause partial register stalls. */
17255 if (regno < 4 || TARGET_64BIT)
17256 return 1;
17257 if (!TARGET_PARTIAL_REG_STALL)
17258 return 1;
17259 return reload_in_progress || reload_completed;
17260 }
17261 /* We handle both integer and floats in the general purpose registers. */
17262 else if (VALID_INT_MODE_P (mode))
17263 return 1;
17264 else if (VALID_FP_MODE_P (mode))
17265 return 1;
17266 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
17267 on to use that value in smaller contexts, this can easily force a
17268 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
17269 supporting DImode, allow it. */
17270 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
17271 return 1;
17272
17273 return 0;
17274 }
17275
17276 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
17277 tieable integer mode. */
17278
17279 static bool
17280 ix86_tieable_integer_mode_p (enum machine_mode mode)
17281 {
17282 switch (mode)
17283 {
17284 case HImode:
17285 case SImode:
17286 return true;
17287
17288 case QImode:
17289 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
17290
17291 case DImode:
17292 return TARGET_64BIT;
17293
17294 default:
17295 return false;
17296 }
17297 }
17298
17299 /* Return true if MODE1 is accessible in a register that can hold MODE2
17300 without copying. That is, all register classes that can hold MODE2
17301 can also hold MODE1. */
17302
17303 bool
17304 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
17305 {
17306 if (mode1 == mode2)
17307 return true;
17308
17309 if (ix86_tieable_integer_mode_p (mode1)
17310 && ix86_tieable_integer_mode_p (mode2))
17311 return true;
17312
17313 /* MODE2 being XFmode implies fp stack or general regs, which means we
17314 can tie any smaller floating point modes to it. Note that we do not
17315 tie this with TFmode. */
17316 if (mode2 == XFmode)
17317 return mode1 == SFmode || mode1 == DFmode;
17318
17319 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17320 that we can tie it with SFmode. */
17321 if (mode2 == DFmode)
17322 return mode1 == SFmode;
17323
17324 /* If MODE2 is only appropriate for an SSE register, then tie with
17325 any other mode acceptable to SSE registers. */
17326 if (GET_MODE_SIZE (mode2) >= 8
17327 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
17328 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17329
17330 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17331 with any other mode acceptable to MMX registers. */
17332 if (GET_MODE_SIZE (mode2) == 8
17333 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17334 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17335
17336 return false;
17337 }
17338
17339 /* Return the cost of moving data of mode M between a
17340 register and memory. A value of 2 is the default; this cost is
17341 relative to those in `REGISTER_MOVE_COST'.
17342
17343 If moving between registers and memory is more expensive than
17344 between two registers, you should define this macro to express the
17345 relative cost.
17346
17347 Model also increased moving costs of QImode registers in non
17348 Q_REGS classes.
17349 */
17350 int
17351 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17352 {
17353 if (FLOAT_CLASS_P (class))
17354 {
17355 int index;
17356 switch (mode)
17357 {
17358 case SFmode:
17359 index = 0;
17360 break;
17361 case DFmode:
17362 index = 1;
17363 break;
17364 case XFmode:
17365 index = 2;
17366 break;
17367 default:
17368 return 100;
17369 }
17370 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17371 }
17372 if (SSE_CLASS_P (class))
17373 {
17374 int index;
17375 switch (GET_MODE_SIZE (mode))
17376 {
17377 case 4:
17378 index = 0;
17379 break;
17380 case 8:
17381 index = 1;
17382 break;
17383 case 16:
17384 index = 2;
17385 break;
17386 default:
17387 return 100;
17388 }
17389 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17390 }
17391 if (MMX_CLASS_P (class))
17392 {
17393 int index;
17394 switch (GET_MODE_SIZE (mode))
17395 {
17396 case 4:
17397 index = 0;
17398 break;
17399 case 8:
17400 index = 1;
17401 break;
17402 default:
17403 return 100;
17404 }
17405 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17406 }
17407 switch (GET_MODE_SIZE (mode))
17408 {
17409 case 1:
17410 if (in)
17411 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17412 : ix86_cost->movzbl_load);
17413 else
17414 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17415 : ix86_cost->int_store[0] + 4);
17416 break;
17417 case 2:
17418 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17419 default:
17420 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
17421 if (mode == TFmode)
17422 mode = XFmode;
17423 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17424 * (((int) GET_MODE_SIZE (mode)
17425 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17426 }
17427 }
17428
17429 /* Compute a (partial) cost for rtx X. Return true if the complete
17430 cost has been computed, and false if subexpressions should be
17431 scanned. In either case, *TOTAL contains the cost result. */
17432
17433 static bool
17434 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17435 {
17436 enum machine_mode mode = GET_MODE (x);
17437
17438 switch (code)
17439 {
17440 case CONST_INT:
17441 case CONST:
17442 case LABEL_REF:
17443 case SYMBOL_REF:
17444 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17445 *total = 3;
17446 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17447 *total = 2;
17448 else if (flag_pic && SYMBOLIC_CONST (x)
17449 && (!TARGET_64BIT
17450 || (!GET_CODE (x) != LABEL_REF
17451 && (GET_CODE (x) != SYMBOL_REF
17452 || !SYMBOL_REF_LOCAL_P (x)))))
17453 *total = 1;
17454 else
17455 *total = 0;
17456 return true;
17457
17458 case CONST_DOUBLE:
17459 if (mode == VOIDmode)
17460 *total = 0;
17461 else
17462 switch (standard_80387_constant_p (x))
17463 {
17464 case 1: /* 0.0 */
17465 *total = 1;
17466 break;
17467 default: /* Other constants */
17468 *total = 2;
17469 break;
17470 case 0:
17471 case -1:
17472 /* Start with (MEM (SYMBOL_REF)), since that's where
17473 it'll probably end up. Add a penalty for size. */
17474 *total = (COSTS_N_INSNS (1)
17475 + (flag_pic != 0 && !TARGET_64BIT)
17476 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17477 break;
17478 }
17479 return true;
17480
17481 case ZERO_EXTEND:
17482 /* The zero extensions is often completely free on x86_64, so make
17483 it as cheap as possible. */
17484 if (TARGET_64BIT && mode == DImode
17485 && GET_MODE (XEXP (x, 0)) == SImode)
17486 *total = 1;
17487 else if (TARGET_ZERO_EXTEND_WITH_AND)
17488 *total = ix86_cost->add;
17489 else
17490 *total = ix86_cost->movzx;
17491 return false;
17492
17493 case SIGN_EXTEND:
17494 *total = ix86_cost->movsx;
17495 return false;
17496
17497 case ASHIFT:
17498 if (GET_CODE (XEXP (x, 1)) == CONST_INT
17499 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17500 {
17501 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17502 if (value == 1)
17503 {
17504 *total = ix86_cost->add;
17505 return false;
17506 }
17507 if ((value == 2 || value == 3)
17508 && ix86_cost->lea <= ix86_cost->shift_const)
17509 {
17510 *total = ix86_cost->lea;
17511 return false;
17512 }
17513 }
17514 /* FALLTHRU */
17515
17516 case ROTATE:
17517 case ASHIFTRT:
17518 case LSHIFTRT:
17519 case ROTATERT:
17520 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17521 {
17522 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17523 {
17524 if (INTVAL (XEXP (x, 1)) > 32)
17525 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17526 else
17527 *total = ix86_cost->shift_const * 2;
17528 }
17529 else
17530 {
17531 if (GET_CODE (XEXP (x, 1)) == AND)
17532 *total = ix86_cost->shift_var * 2;
17533 else
17534 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17535 }
17536 }
17537 else
17538 {
17539 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17540 *total = ix86_cost->shift_const;
17541 else
17542 *total = ix86_cost->shift_var;
17543 }
17544 return false;
17545
17546 case MULT:
17547 if (FLOAT_MODE_P (mode))
17548 {
17549 *total = ix86_cost->fmul;
17550 return false;
17551 }
17552 else
17553 {
17554 rtx op0 = XEXP (x, 0);
17555 rtx op1 = XEXP (x, 1);
17556 int nbits;
17557 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17558 {
17559 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17560 for (nbits = 0; value != 0; value &= value - 1)
17561 nbits++;
17562 }
17563 else
17564 /* This is arbitrary. */
17565 nbits = 7;
17566
17567 /* Compute costs correctly for widening multiplication. */
17568 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17569 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17570 == GET_MODE_SIZE (mode))
17571 {
17572 int is_mulwiden = 0;
17573 enum machine_mode inner_mode = GET_MODE (op0);
17574
17575 if (GET_CODE (op0) == GET_CODE (op1))
17576 is_mulwiden = 1, op1 = XEXP (op1, 0);
17577 else if (GET_CODE (op1) == CONST_INT)
17578 {
17579 if (GET_CODE (op0) == SIGN_EXTEND)
17580 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17581 == INTVAL (op1);
17582 else
17583 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17584 }
17585
17586 if (is_mulwiden)
17587 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17588 }
17589
17590 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17591 + nbits * ix86_cost->mult_bit
17592 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17593
17594 return true;
17595 }
17596
17597 case DIV:
17598 case UDIV:
17599 case MOD:
17600 case UMOD:
17601 if (FLOAT_MODE_P (mode))
17602 *total = ix86_cost->fdiv;
17603 else
17604 *total = ix86_cost->divide[MODE_INDEX (mode)];
17605 return false;
17606
17607 case PLUS:
17608 if (FLOAT_MODE_P (mode))
17609 *total = ix86_cost->fadd;
17610 else if (GET_MODE_CLASS (mode) == MODE_INT
17611 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17612 {
17613 if (GET_CODE (XEXP (x, 0)) == PLUS
17614 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17615 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17616 && CONSTANT_P (XEXP (x, 1)))
17617 {
17618 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17619 if (val == 2 || val == 4 || val == 8)
17620 {
17621 *total = ix86_cost->lea;
17622 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17623 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17624 outer_code);
17625 *total += rtx_cost (XEXP (x, 1), outer_code);
17626 return true;
17627 }
17628 }
17629 else if (GET_CODE (XEXP (x, 0)) == MULT
17630 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17631 {
17632 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17633 if (val == 2 || val == 4 || val == 8)
17634 {
17635 *total = ix86_cost->lea;
17636 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17637 *total += rtx_cost (XEXP (x, 1), outer_code);
17638 return true;
17639 }
17640 }
17641 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17642 {
17643 *total = ix86_cost->lea;
17644 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17645 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17646 *total += rtx_cost (XEXP (x, 1), outer_code);
17647 return true;
17648 }
17649 }
17650 /* FALLTHRU */
17651
17652 case MINUS:
17653 if (FLOAT_MODE_P (mode))
17654 {
17655 *total = ix86_cost->fadd;
17656 return false;
17657 }
17658 /* FALLTHRU */
17659
17660 case AND:
17661 case IOR:
17662 case XOR:
17663 if (!TARGET_64BIT && mode == DImode)
17664 {
17665 *total = (ix86_cost->add * 2
17666 + (rtx_cost (XEXP (x, 0), outer_code)
17667 << (GET_MODE (XEXP (x, 0)) != DImode))
17668 + (rtx_cost (XEXP (x, 1), outer_code)
17669 << (GET_MODE (XEXP (x, 1)) != DImode)));
17670 return true;
17671 }
17672 /* FALLTHRU */
17673
17674 case NEG:
17675 if (FLOAT_MODE_P (mode))
17676 {
17677 *total = ix86_cost->fchs;
17678 return false;
17679 }
17680 /* FALLTHRU */
17681
17682 case NOT:
17683 if (!TARGET_64BIT && mode == DImode)
17684 *total = ix86_cost->add * 2;
17685 else
17686 *total = ix86_cost->add;
17687 return false;
17688
17689 case COMPARE:
17690 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17691 && XEXP (XEXP (x, 0), 1) == const1_rtx
17692 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17693 && XEXP (x, 1) == const0_rtx)
17694 {
17695 /* This kind of construct is implemented using test[bwl].
17696 Treat it as if we had an AND. */
17697 *total = (ix86_cost->add
17698 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17699 + rtx_cost (const1_rtx, outer_code));
17700 return true;
17701 }
17702 return false;
17703
17704 case FLOAT_EXTEND:
17705 if (!TARGET_SSE_MATH
17706 || mode == XFmode
17707 || (mode == DFmode && !TARGET_SSE2))
17708 *total = 0;
17709 return false;
17710
17711 case ABS:
17712 if (FLOAT_MODE_P (mode))
17713 *total = ix86_cost->fabs;
17714 return false;
17715
17716 case SQRT:
17717 if (FLOAT_MODE_P (mode))
17718 *total = ix86_cost->fsqrt;
17719 return false;
17720
17721 case UNSPEC:
17722 if (XINT (x, 1) == UNSPEC_TP)
17723 *total = 0;
17724 return false;
17725
17726 default:
17727 return false;
17728 }
17729 }
17730
17731 #if TARGET_MACHO
17732
17733 static int current_machopic_label_num;
17734
17735 /* Given a symbol name and its associated stub, write out the
17736 definition of the stub. */
17737
17738 void
17739 machopic_output_stub (FILE *file, const char *symb, const char *stub)
17740 {
17741 unsigned int length;
17742 char *binder_name, *symbol_name, lazy_ptr_name[32];
17743 int label = ++current_machopic_label_num;
17744
17745 /* For 64-bit we shouldn't get here. */
17746 gcc_assert (!TARGET_64BIT);
17747
17748 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17749 symb = (*targetm.strip_name_encoding) (symb);
17750
17751 length = strlen (stub);
17752 binder_name = alloca (length + 32);
17753 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17754
17755 length = strlen (symb);
17756 symbol_name = alloca (length + 32);
17757 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17758
17759 sprintf (lazy_ptr_name, "L%d$lz", label);
17760
17761 if (MACHOPIC_PURE)
17762 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17763 else
17764 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17765
17766 fprintf (file, "%s:\n", stub);
17767 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17768
17769 if (MACHOPIC_PURE)
17770 {
17771 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17772 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17773 fprintf (file, "\tjmp\t*%%edx\n");
17774 }
17775 else
17776 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17777
17778 fprintf (file, "%s:\n", binder_name);
17779
17780 if (MACHOPIC_PURE)
17781 {
17782 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17783 fprintf (file, "\tpushl\t%%eax\n");
17784 }
17785 else
17786 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17787
17788 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17789
17790 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17791 fprintf (file, "%s:\n", lazy_ptr_name);
17792 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17793 fprintf (file, "\t.long %s\n", binder_name);
17794 }
17795
17796 void
17797 darwin_x86_file_end (void)
17798 {
17799 darwin_file_end ();
17800 ix86_file_end ();
17801 }
17802 #endif /* TARGET_MACHO */
17803
17804 /* Order the registers for register allocator. */
17805
17806 void
17807 x86_order_regs_for_local_alloc (void)
17808 {
17809 int pos = 0;
17810 int i;
17811
17812 /* First allocate the local general purpose registers. */
17813 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17814 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17815 reg_alloc_order [pos++] = i;
17816
17817 /* Global general purpose registers. */
17818 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17819 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17820 reg_alloc_order [pos++] = i;
17821
17822 /* x87 registers come first in case we are doing FP math
17823 using them. */
17824 if (!TARGET_SSE_MATH)
17825 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17826 reg_alloc_order [pos++] = i;
17827
17828 /* SSE registers. */
17829 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17830 reg_alloc_order [pos++] = i;
17831 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17832 reg_alloc_order [pos++] = i;
17833
17834 /* x87 registers. */
17835 if (TARGET_SSE_MATH)
17836 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17837 reg_alloc_order [pos++] = i;
17838
17839 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17840 reg_alloc_order [pos++] = i;
17841
17842 /* Initialize the rest of array as we do not allocate some registers
17843 at all. */
17844 while (pos < FIRST_PSEUDO_REGISTER)
17845 reg_alloc_order [pos++] = 0;
17846 }
17847
17848 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17849 struct attribute_spec.handler. */
17850 static tree
17851 ix86_handle_struct_attribute (tree *node, tree name,
17852 tree args ATTRIBUTE_UNUSED,
17853 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17854 {
17855 tree *type = NULL;
17856 if (DECL_P (*node))
17857 {
17858 if (TREE_CODE (*node) == TYPE_DECL)
17859 type = &TREE_TYPE (*node);
17860 }
17861 else
17862 type = node;
17863
17864 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17865 || TREE_CODE (*type) == UNION_TYPE)))
17866 {
17867 warning (OPT_Wattributes, "%qs attribute ignored",
17868 IDENTIFIER_POINTER (name));
17869 *no_add_attrs = true;
17870 }
17871
17872 else if ((is_attribute_p ("ms_struct", name)
17873 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17874 || ((is_attribute_p ("gcc_struct", name)
17875 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17876 {
17877 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17878 IDENTIFIER_POINTER (name));
17879 *no_add_attrs = true;
17880 }
17881
17882 return NULL_TREE;
17883 }
17884
17885 static bool
17886 ix86_ms_bitfield_layout_p (tree record_type)
17887 {
17888 return (TARGET_MS_BITFIELD_LAYOUT &&
17889 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17890 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17891 }
17892
17893 /* Returns an expression indicating where the this parameter is
17894 located on entry to the FUNCTION. */
17895
17896 static rtx
17897 x86_this_parameter (tree function)
17898 {
17899 tree type = TREE_TYPE (function);
17900
17901 if (TARGET_64BIT)
17902 {
17903 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17904 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17905 }
17906
17907 if (ix86_function_regparm (type, function) > 0)
17908 {
17909 tree parm;
17910
17911 parm = TYPE_ARG_TYPES (type);
17912 /* Figure out whether or not the function has a variable number of
17913 arguments. */
17914 for (; parm; parm = TREE_CHAIN (parm))
17915 if (TREE_VALUE (parm) == void_type_node)
17916 break;
17917 /* If not, the this parameter is in the first argument. */
17918 if (parm)
17919 {
17920 int regno = 0;
17921 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17922 regno = 2;
17923 return gen_rtx_REG (SImode, regno);
17924 }
17925 }
17926
17927 if (aggregate_value_p (TREE_TYPE (type), type))
17928 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17929 else
17930 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17931 }
17932
17933 /* Determine whether x86_output_mi_thunk can succeed. */
17934
17935 static bool
17936 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17937 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17938 HOST_WIDE_INT vcall_offset, tree function)
17939 {
17940 /* 64-bit can handle anything. */
17941 if (TARGET_64BIT)
17942 return true;
17943
17944 /* For 32-bit, everything's fine if we have one free register. */
17945 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17946 return true;
17947
17948 /* Need a free register for vcall_offset. */
17949 if (vcall_offset)
17950 return false;
17951
17952 /* Need a free register for GOT references. */
17953 if (flag_pic && !(*targetm.binds_local_p) (function))
17954 return false;
17955
17956 /* Otherwise ok. */
17957 return true;
17958 }
17959
17960 /* Output the assembler code for a thunk function. THUNK_DECL is the
17961 declaration for the thunk function itself, FUNCTION is the decl for
17962 the target function. DELTA is an immediate constant offset to be
17963 added to THIS. If VCALL_OFFSET is nonzero, the word at
17964 *(*this + vcall_offset) should be added to THIS. */
17965
17966 static void
17967 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17968 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17969 HOST_WIDE_INT vcall_offset, tree function)
17970 {
17971 rtx xops[3];
17972 rtx this = x86_this_parameter (function);
17973 rtx this_reg, tmp;
17974
17975 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17976 pull it in now and let DELTA benefit. */
17977 if (REG_P (this))
17978 this_reg = this;
17979 else if (vcall_offset)
17980 {
17981 /* Put the this parameter into %eax. */
17982 xops[0] = this;
17983 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17984 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17985 }
17986 else
17987 this_reg = NULL_RTX;
17988
17989 /* Adjust the this parameter by a fixed constant. */
17990 if (delta)
17991 {
17992 xops[0] = GEN_INT (delta);
17993 xops[1] = this_reg ? this_reg : this;
17994 if (TARGET_64BIT)
17995 {
17996 if (!x86_64_general_operand (xops[0], DImode))
17997 {
17998 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17999 xops[1] = tmp;
18000 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
18001 xops[0] = tmp;
18002 xops[1] = this;
18003 }
18004 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18005 }
18006 else
18007 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18008 }
18009
18010 /* Adjust the this parameter by a value stored in the vtable. */
18011 if (vcall_offset)
18012 {
18013 if (TARGET_64BIT)
18014 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18015 else
18016 {
18017 int tmp_regno = 2 /* ECX */;
18018 if (lookup_attribute ("fastcall",
18019 TYPE_ATTRIBUTES (TREE_TYPE (function))))
18020 tmp_regno = 0 /* EAX */;
18021 tmp = gen_rtx_REG (SImode, tmp_regno);
18022 }
18023
18024 xops[0] = gen_rtx_MEM (Pmode, this_reg);
18025 xops[1] = tmp;
18026 if (TARGET_64BIT)
18027 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18028 else
18029 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18030
18031 /* Adjust the this parameter. */
18032 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
18033 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
18034 {
18035 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
18036 xops[0] = GEN_INT (vcall_offset);
18037 xops[1] = tmp2;
18038 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18039 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
18040 }
18041 xops[1] = this_reg;
18042 if (TARGET_64BIT)
18043 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18044 else
18045 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18046 }
18047
18048 /* If necessary, drop THIS back to its stack slot. */
18049 if (this_reg && this_reg != this)
18050 {
18051 xops[0] = this_reg;
18052 xops[1] = this;
18053 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18054 }
18055
18056 xops[0] = XEXP (DECL_RTL (function), 0);
18057 if (TARGET_64BIT)
18058 {
18059 if (!flag_pic || (*targetm.binds_local_p) (function))
18060 output_asm_insn ("jmp\t%P0", xops);
18061 else
18062 {
18063 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
18064 tmp = gen_rtx_CONST (Pmode, tmp);
18065 tmp = gen_rtx_MEM (QImode, tmp);
18066 xops[0] = tmp;
18067 output_asm_insn ("jmp\t%A0", xops);
18068 }
18069 }
18070 else
18071 {
18072 if (!flag_pic || (*targetm.binds_local_p) (function))
18073 output_asm_insn ("jmp\t%P0", xops);
18074 else
18075 #if TARGET_MACHO
18076 if (TARGET_MACHO)
18077 {
18078 rtx sym_ref = XEXP (DECL_RTL (function), 0);
18079 tmp = (gen_rtx_SYMBOL_REF
18080 (Pmode,
18081 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
18082 tmp = gen_rtx_MEM (QImode, tmp);
18083 xops[0] = tmp;
18084 output_asm_insn ("jmp\t%0", xops);
18085 }
18086 else
18087 #endif /* TARGET_MACHO */
18088 {
18089 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
18090 output_set_got (tmp, NULL_RTX);
18091
18092 xops[1] = tmp;
18093 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
18094 output_asm_insn ("jmp\t{*}%1", xops);
18095 }
18096 }
18097 }
18098
18099 static void
18100 x86_file_start (void)
18101 {
18102 default_file_start ();
18103 #if TARGET_MACHO
18104 darwin_file_start ();
18105 #endif
18106 if (X86_FILE_START_VERSION_DIRECTIVE)
18107 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
18108 if (X86_FILE_START_FLTUSED)
18109 fputs ("\t.global\t__fltused\n", asm_out_file);
18110 if (ix86_asm_dialect == ASM_INTEL)
18111 fputs ("\t.intel_syntax\n", asm_out_file);
18112 }
18113
18114 int
18115 x86_field_alignment (tree field, int computed)
18116 {
18117 enum machine_mode mode;
18118 tree type = TREE_TYPE (field);
18119
18120 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
18121 return computed;
18122 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
18123 ? get_inner_array_type (type) : type);
18124 if (mode == DFmode || mode == DCmode
18125 || GET_MODE_CLASS (mode) == MODE_INT
18126 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
18127 return MIN (32, computed);
18128 return computed;
18129 }
18130
18131 /* Output assembler code to FILE to increment profiler label # LABELNO
18132 for profiling a function entry. */
18133 void
18134 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
18135 {
18136 if (TARGET_64BIT)
18137 if (flag_pic)
18138 {
18139 #ifndef NO_PROFILE_COUNTERS
18140 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
18141 #endif
18142 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
18143 }
18144 else
18145 {
18146 #ifndef NO_PROFILE_COUNTERS
18147 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
18148 #endif
18149 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18150 }
18151 else if (flag_pic)
18152 {
18153 #ifndef NO_PROFILE_COUNTERS
18154 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
18155 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
18156 #endif
18157 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
18158 }
18159 else
18160 {
18161 #ifndef NO_PROFILE_COUNTERS
18162 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
18163 PROFILE_COUNT_REGISTER);
18164 #endif
18165 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18166 }
18167 }
18168
18169 /* We don't have exact information about the insn sizes, but we may assume
18170 quite safely that we are informed about all 1 byte insns and memory
18171 address sizes. This is enough to eliminate unnecessary padding in
18172 99% of cases. */
18173
18174 static int
18175 min_insn_size (rtx insn)
18176 {
18177 int l = 0;
18178
18179 if (!INSN_P (insn) || !active_insn_p (insn))
18180 return 0;
18181
18182 /* Discard alignments we've emit and jump instructions. */
18183 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
18184 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
18185 return 0;
18186 if (GET_CODE (insn) == JUMP_INSN
18187 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
18188 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
18189 return 0;
18190
18191 /* Important case - calls are always 5 bytes.
18192 It is common to have many calls in the row. */
18193 if (GET_CODE (insn) == CALL_INSN
18194 && symbolic_reference_mentioned_p (PATTERN (insn))
18195 && !SIBLING_CALL_P (insn))
18196 return 5;
18197 if (get_attr_length (insn) <= 1)
18198 return 1;
18199
18200 /* For normal instructions we may rely on the sizes of addresses
18201 and the presence of symbol to require 4 bytes of encoding.
18202 This is not the case for jumps where references are PC relative. */
18203 if (GET_CODE (insn) != JUMP_INSN)
18204 {
18205 l = get_attr_length_address (insn);
18206 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
18207 l = 4;
18208 }
18209 if (l)
18210 return 1+l;
18211 else
18212 return 2;
18213 }
18214
18215 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18216 window. */
18217
18218 static void
18219 ix86_avoid_jump_misspredicts (void)
18220 {
18221 rtx insn, start = get_insns ();
18222 int nbytes = 0, njumps = 0;
18223 int isjump = 0;
18224
18225 /* Look for all minimal intervals of instructions containing 4 jumps.
18226 The intervals are bounded by START and INSN. NBYTES is the total
18227 size of instructions in the interval including INSN and not including
18228 START. When the NBYTES is smaller than 16 bytes, it is possible
18229 that the end of START and INSN ends up in the same 16byte page.
18230
18231 The smallest offset in the page INSN can start is the case where START
18232 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
18233 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
18234 */
18235 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18236 {
18237
18238 nbytes += min_insn_size (insn);
18239 if (dump_file)
18240 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
18241 INSN_UID (insn), min_insn_size (insn));
18242 if ((GET_CODE (insn) == JUMP_INSN
18243 && GET_CODE (PATTERN (insn)) != ADDR_VEC
18244 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
18245 || GET_CODE (insn) == CALL_INSN)
18246 njumps++;
18247 else
18248 continue;
18249
18250 while (njumps > 3)
18251 {
18252 start = NEXT_INSN (start);
18253 if ((GET_CODE (start) == JUMP_INSN
18254 && GET_CODE (PATTERN (start)) != ADDR_VEC
18255 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
18256 || GET_CODE (start) == CALL_INSN)
18257 njumps--, isjump = 1;
18258 else
18259 isjump = 0;
18260 nbytes -= min_insn_size (start);
18261 }
18262 gcc_assert (njumps >= 0);
18263 if (dump_file)
18264 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
18265 INSN_UID (start), INSN_UID (insn), nbytes);
18266
18267 if (njumps == 3 && isjump && nbytes < 16)
18268 {
18269 int padsize = 15 - nbytes + min_insn_size (insn);
18270
18271 if (dump_file)
18272 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
18273 INSN_UID (insn), padsize);
18274 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
18275 }
18276 }
18277 }
18278
18279 /* AMD Athlon works faster
18280 when RET is not destination of conditional jump or directly preceded
18281 by other jump instruction. We avoid the penalty by inserting NOP just
18282 before the RET instructions in such cases. */
18283 static void
18284 ix86_pad_returns (void)
18285 {
18286 edge e;
18287 edge_iterator ei;
18288
18289 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
18290 {
18291 basic_block bb = e->src;
18292 rtx ret = BB_END (bb);
18293 rtx prev;
18294 bool replace = false;
18295
18296 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
18297 || !maybe_hot_bb_p (bb))
18298 continue;
18299 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
18300 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
18301 break;
18302 if (prev && GET_CODE (prev) == CODE_LABEL)
18303 {
18304 edge e;
18305 edge_iterator ei;
18306
18307 FOR_EACH_EDGE (e, ei, bb->preds)
18308 if (EDGE_FREQUENCY (e) && e->src->index >= 0
18309 && !(e->flags & EDGE_FALLTHRU))
18310 replace = true;
18311 }
18312 if (!replace)
18313 {
18314 prev = prev_active_insn (ret);
18315 if (prev
18316 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18317 || GET_CODE (prev) == CALL_INSN))
18318 replace = true;
18319 /* Empty functions get branch mispredict even when the jump destination
18320 is not visible to us. */
18321 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18322 replace = true;
18323 }
18324 if (replace)
18325 {
18326 emit_insn_before (gen_return_internal_long (), ret);
18327 delete_insn (ret);
18328 }
18329 }
18330 }
18331
18332 /* Implement machine specific optimizations. We implement padding of returns
18333 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
18334 static void
18335 ix86_reorg (void)
18336 {
18337 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18338 ix86_pad_returns ();
18339 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18340 ix86_avoid_jump_misspredicts ();
18341 }
18342
18343 /* Return nonzero when QImode register that must be represented via REX prefix
18344 is used. */
18345 bool
18346 x86_extended_QIreg_mentioned_p (rtx insn)
18347 {
18348 int i;
18349 extract_insn_cached (insn);
18350 for (i = 0; i < recog_data.n_operands; i++)
18351 if (REG_P (recog_data.operand[i])
18352 && REGNO (recog_data.operand[i]) >= 4)
18353 return true;
18354 return false;
18355 }
18356
18357 /* Return nonzero when P points to register encoded via REX prefix.
18358 Called via for_each_rtx. */
18359 static int
18360 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18361 {
18362 unsigned int regno;
18363 if (!REG_P (*p))
18364 return 0;
18365 regno = REGNO (*p);
18366 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18367 }
18368
18369 /* Return true when INSN mentions register that must be encoded using REX
18370 prefix. */
18371 bool
18372 x86_extended_reg_mentioned_p (rtx insn)
18373 {
18374 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18375 }
18376
18377 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
18378 optabs would emit if we didn't have TFmode patterns. */
18379
18380 void
18381 x86_emit_floatuns (rtx operands[2])
18382 {
18383 rtx neglab, donelab, i0, i1, f0, in, out;
18384 enum machine_mode mode, inmode;
18385
18386 inmode = GET_MODE (operands[1]);
18387 gcc_assert (inmode == SImode || inmode == DImode);
18388
18389 out = operands[0];
18390 in = force_reg (inmode, operands[1]);
18391 mode = GET_MODE (out);
18392 neglab = gen_label_rtx ();
18393 donelab = gen_label_rtx ();
18394 i1 = gen_reg_rtx (Pmode);
18395 f0 = gen_reg_rtx (mode);
18396
18397 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18398
18399 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18400 emit_jump_insn (gen_jump (donelab));
18401 emit_barrier ();
18402
18403 emit_label (neglab);
18404
18405 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18406 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18407 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18408 expand_float (f0, i0, 0);
18409 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18410
18411 emit_label (donelab);
18412 }
18413 \f
18414 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18415 with all elements equal to VAR. Return true if successful. */
18416
18417 static bool
18418 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18419 rtx target, rtx val)
18420 {
18421 enum machine_mode smode, wsmode, wvmode;
18422 rtx x;
18423
18424 switch (mode)
18425 {
18426 case V2SImode:
18427 case V2SFmode:
18428 if (!mmx_ok)
18429 return false;
18430 /* FALLTHRU */
18431
18432 case V2DFmode:
18433 case V2DImode:
18434 case V4SFmode:
18435 case V4SImode:
18436 val = force_reg (GET_MODE_INNER (mode), val);
18437 x = gen_rtx_VEC_DUPLICATE (mode, val);
18438 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18439 return true;
18440
18441 case V4HImode:
18442 if (!mmx_ok)
18443 return false;
18444 if (TARGET_SSE || TARGET_3DNOW_A)
18445 {
18446 val = gen_lowpart (SImode, val);
18447 x = gen_rtx_TRUNCATE (HImode, val);
18448 x = gen_rtx_VEC_DUPLICATE (mode, x);
18449 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18450 return true;
18451 }
18452 else
18453 {
18454 smode = HImode;
18455 wsmode = SImode;
18456 wvmode = V2SImode;
18457 goto widen;
18458 }
18459
18460 case V8QImode:
18461 if (!mmx_ok)
18462 return false;
18463 smode = QImode;
18464 wsmode = HImode;
18465 wvmode = V4HImode;
18466 goto widen;
18467 case V8HImode:
18468 if (TARGET_SSE2)
18469 {
18470 rtx tmp1, tmp2;
18471 /* Extend HImode to SImode using a paradoxical SUBREG. */
18472 tmp1 = gen_reg_rtx (SImode);
18473 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18474 /* Insert the SImode value as low element of V4SImode vector. */
18475 tmp2 = gen_reg_rtx (V4SImode);
18476 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18477 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18478 CONST0_RTX (V4SImode),
18479 const1_rtx);
18480 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18481 /* Cast the V4SImode vector back to a V8HImode vector. */
18482 tmp1 = gen_reg_rtx (V8HImode);
18483 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18484 /* Duplicate the low short through the whole low SImode word. */
18485 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18486 /* Cast the V8HImode vector back to a V4SImode vector. */
18487 tmp2 = gen_reg_rtx (V4SImode);
18488 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18489 /* Replicate the low element of the V4SImode vector. */
18490 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18491 /* Cast the V2SImode back to V8HImode, and store in target. */
18492 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18493 return true;
18494 }
18495 smode = HImode;
18496 wsmode = SImode;
18497 wvmode = V4SImode;
18498 goto widen;
18499 case V16QImode:
18500 if (TARGET_SSE2)
18501 {
18502 rtx tmp1, tmp2;
18503 /* Extend QImode to SImode using a paradoxical SUBREG. */
18504 tmp1 = gen_reg_rtx (SImode);
18505 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18506 /* Insert the SImode value as low element of V4SImode vector. */
18507 tmp2 = gen_reg_rtx (V4SImode);
18508 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18509 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18510 CONST0_RTX (V4SImode),
18511 const1_rtx);
18512 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18513 /* Cast the V4SImode vector back to a V16QImode vector. */
18514 tmp1 = gen_reg_rtx (V16QImode);
18515 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18516 /* Duplicate the low byte through the whole low SImode word. */
18517 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18518 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18519 /* Cast the V16QImode vector back to a V4SImode vector. */
18520 tmp2 = gen_reg_rtx (V4SImode);
18521 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18522 /* Replicate the low element of the V4SImode vector. */
18523 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18524 /* Cast the V2SImode back to V16QImode, and store in target. */
18525 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18526 return true;
18527 }
18528 smode = QImode;
18529 wsmode = HImode;
18530 wvmode = V8HImode;
18531 goto widen;
18532 widen:
18533 /* Replicate the value once into the next wider mode and recurse. */
18534 val = convert_modes (wsmode, smode, val, true);
18535 x = expand_simple_binop (wsmode, ASHIFT, val,
18536 GEN_INT (GET_MODE_BITSIZE (smode)),
18537 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18538 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18539
18540 x = gen_reg_rtx (wvmode);
18541 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18542 gcc_unreachable ();
18543 emit_move_insn (target, gen_lowpart (mode, x));
18544 return true;
18545
18546 default:
18547 return false;
18548 }
18549 }
18550
18551 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18552 whose ONE_VAR element is VAR, and other elements are zero. Return true
18553 if successful. */
18554
18555 static bool
18556 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18557 rtx target, rtx var, int one_var)
18558 {
18559 enum machine_mode vsimode;
18560 rtx new_target;
18561 rtx x, tmp;
18562
18563 switch (mode)
18564 {
18565 case V2SFmode:
18566 case V2SImode:
18567 if (!mmx_ok)
18568 return false;
18569 /* FALLTHRU */
18570
18571 case V2DFmode:
18572 case V2DImode:
18573 if (one_var != 0)
18574 return false;
18575 var = force_reg (GET_MODE_INNER (mode), var);
18576 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18577 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18578 return true;
18579
18580 case V4SFmode:
18581 case V4SImode:
18582 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18583 new_target = gen_reg_rtx (mode);
18584 else
18585 new_target = target;
18586 var = force_reg (GET_MODE_INNER (mode), var);
18587 x = gen_rtx_VEC_DUPLICATE (mode, var);
18588 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18589 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18590 if (one_var != 0)
18591 {
18592 /* We need to shuffle the value to the correct position, so
18593 create a new pseudo to store the intermediate result. */
18594
18595 /* With SSE2, we can use the integer shuffle insns. */
18596 if (mode != V4SFmode && TARGET_SSE2)
18597 {
18598 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18599 GEN_INT (1),
18600 GEN_INT (one_var == 1 ? 0 : 1),
18601 GEN_INT (one_var == 2 ? 0 : 1),
18602 GEN_INT (one_var == 3 ? 0 : 1)));
18603 if (target != new_target)
18604 emit_move_insn (target, new_target);
18605 return true;
18606 }
18607
18608 /* Otherwise convert the intermediate result to V4SFmode and
18609 use the SSE1 shuffle instructions. */
18610 if (mode != V4SFmode)
18611 {
18612 tmp = gen_reg_rtx (V4SFmode);
18613 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18614 }
18615 else
18616 tmp = new_target;
18617
18618 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18619 GEN_INT (1),
18620 GEN_INT (one_var == 1 ? 0 : 1),
18621 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18622 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18623
18624 if (mode != V4SFmode)
18625 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18626 else if (tmp != target)
18627 emit_move_insn (target, tmp);
18628 }
18629 else if (target != new_target)
18630 emit_move_insn (target, new_target);
18631 return true;
18632
18633 case V8HImode:
18634 case V16QImode:
18635 vsimode = V4SImode;
18636 goto widen;
18637 case V4HImode:
18638 case V8QImode:
18639 if (!mmx_ok)
18640 return false;
18641 vsimode = V2SImode;
18642 goto widen;
18643 widen:
18644 if (one_var != 0)
18645 return false;
18646
18647 /* Zero extend the variable element to SImode and recurse. */
18648 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18649
18650 x = gen_reg_rtx (vsimode);
18651 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18652 var, one_var))
18653 gcc_unreachable ();
18654
18655 emit_move_insn (target, gen_lowpart (mode, x));
18656 return true;
18657
18658 default:
18659 return false;
18660 }
18661 }
18662
18663 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18664 consisting of the values in VALS. It is known that all elements
18665 except ONE_VAR are constants. Return true if successful. */
18666
18667 static bool
18668 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18669 rtx target, rtx vals, int one_var)
18670 {
18671 rtx var = XVECEXP (vals, 0, one_var);
18672 enum machine_mode wmode;
18673 rtx const_vec, x;
18674
18675 const_vec = copy_rtx (vals);
18676 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18677 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18678
18679 switch (mode)
18680 {
18681 case V2DFmode:
18682 case V2DImode:
18683 case V2SFmode:
18684 case V2SImode:
18685 /* For the two element vectors, it's just as easy to use
18686 the general case. */
18687 return false;
18688
18689 case V4SFmode:
18690 case V4SImode:
18691 case V8HImode:
18692 case V4HImode:
18693 break;
18694
18695 case V16QImode:
18696 wmode = V8HImode;
18697 goto widen;
18698 case V8QImode:
18699 wmode = V4HImode;
18700 goto widen;
18701 widen:
18702 /* There's no way to set one QImode entry easily. Combine
18703 the variable value with its adjacent constant value, and
18704 promote to an HImode set. */
18705 x = XVECEXP (vals, 0, one_var ^ 1);
18706 if (one_var & 1)
18707 {
18708 var = convert_modes (HImode, QImode, var, true);
18709 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18710 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18711 x = GEN_INT (INTVAL (x) & 0xff);
18712 }
18713 else
18714 {
18715 var = convert_modes (HImode, QImode, var, true);
18716 x = gen_int_mode (INTVAL (x) << 8, HImode);
18717 }
18718 if (x != const0_rtx)
18719 var = expand_simple_binop (HImode, IOR, var, x, var,
18720 1, OPTAB_LIB_WIDEN);
18721
18722 x = gen_reg_rtx (wmode);
18723 emit_move_insn (x, gen_lowpart (wmode, const_vec));
18724 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18725
18726 emit_move_insn (target, gen_lowpart (mode, x));
18727 return true;
18728
18729 default:
18730 return false;
18731 }
18732
18733 emit_move_insn (target, const_vec);
18734 ix86_expand_vector_set (mmx_ok, target, var, one_var);
18735 return true;
18736 }
18737
18738 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18739 all values variable, and none identical. */
18740
18741 static void
18742 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18743 rtx target, rtx vals)
18744 {
18745 enum machine_mode half_mode = GET_MODE_INNER (mode);
18746 rtx op0 = NULL, op1 = NULL;
18747 bool use_vec_concat = false;
18748
18749 switch (mode)
18750 {
18751 case V2SFmode:
18752 case V2SImode:
18753 if (!mmx_ok && !TARGET_SSE)
18754 break;
18755 /* FALLTHRU */
18756
18757 case V2DFmode:
18758 case V2DImode:
18759 /* For the two element vectors, we always implement VEC_CONCAT. */
18760 op0 = XVECEXP (vals, 0, 0);
18761 op1 = XVECEXP (vals, 0, 1);
18762 use_vec_concat = true;
18763 break;
18764
18765 case V4SFmode:
18766 half_mode = V2SFmode;
18767 goto half;
18768 case V4SImode:
18769 half_mode = V2SImode;
18770 goto half;
18771 half:
18772 {
18773 rtvec v;
18774
18775 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18776 Recurse to load the two halves. */
18777
18778 op0 = gen_reg_rtx (half_mode);
18779 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18780 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18781
18782 op1 = gen_reg_rtx (half_mode);
18783 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18784 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18785
18786 use_vec_concat = true;
18787 }
18788 break;
18789
18790 case V8HImode:
18791 case V16QImode:
18792 case V4HImode:
18793 case V8QImode:
18794 break;
18795
18796 default:
18797 gcc_unreachable ();
18798 }
18799
18800 if (use_vec_concat)
18801 {
18802 if (!register_operand (op0, half_mode))
18803 op0 = force_reg (half_mode, op0);
18804 if (!register_operand (op1, half_mode))
18805 op1 = force_reg (half_mode, op1);
18806
18807 emit_insn (gen_rtx_SET (VOIDmode, target,
18808 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18809 }
18810 else
18811 {
18812 int i, j, n_elts, n_words, n_elt_per_word;
18813 enum machine_mode inner_mode;
18814 rtx words[4], shift;
18815
18816 inner_mode = GET_MODE_INNER (mode);
18817 n_elts = GET_MODE_NUNITS (mode);
18818 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18819 n_elt_per_word = n_elts / n_words;
18820 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18821
18822 for (i = 0; i < n_words; ++i)
18823 {
18824 rtx word = NULL_RTX;
18825
18826 for (j = 0; j < n_elt_per_word; ++j)
18827 {
18828 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18829 elt = convert_modes (word_mode, inner_mode, elt, true);
18830
18831 if (j == 0)
18832 word = elt;
18833 else
18834 {
18835 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18836 word, 1, OPTAB_LIB_WIDEN);
18837 word = expand_simple_binop (word_mode, IOR, word, elt,
18838 word, 1, OPTAB_LIB_WIDEN);
18839 }
18840 }
18841
18842 words[i] = word;
18843 }
18844
18845 if (n_words == 1)
18846 emit_move_insn (target, gen_lowpart (mode, words[0]));
18847 else if (n_words == 2)
18848 {
18849 rtx tmp = gen_reg_rtx (mode);
18850 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18851 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18852 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18853 emit_move_insn (target, tmp);
18854 }
18855 else if (n_words == 4)
18856 {
18857 rtx tmp = gen_reg_rtx (V4SImode);
18858 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18859 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18860 emit_move_insn (target, gen_lowpart (mode, tmp));
18861 }
18862 else
18863 gcc_unreachable ();
18864 }
18865 }
18866
18867 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18868 instructions unless MMX_OK is true. */
18869
18870 void
18871 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18872 {
18873 enum machine_mode mode = GET_MODE (target);
18874 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18875 int n_elts = GET_MODE_NUNITS (mode);
18876 int n_var = 0, one_var = -1;
18877 bool all_same = true, all_const_zero = true;
18878 int i;
18879 rtx x;
18880
18881 for (i = 0; i < n_elts; ++i)
18882 {
18883 x = XVECEXP (vals, 0, i);
18884 if (!CONSTANT_P (x))
18885 n_var++, one_var = i;
18886 else if (x != CONST0_RTX (inner_mode))
18887 all_const_zero = false;
18888 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18889 all_same = false;
18890 }
18891
18892 /* Constants are best loaded from the constant pool. */
18893 if (n_var == 0)
18894 {
18895 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18896 return;
18897 }
18898
18899 /* If all values are identical, broadcast the value. */
18900 if (all_same
18901 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18902 XVECEXP (vals, 0, 0)))
18903 return;
18904
18905 /* Values where only one field is non-constant are best loaded from
18906 the pool and overwritten via move later. */
18907 if (n_var == 1)
18908 {
18909 if (all_const_zero
18910 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18911 XVECEXP (vals, 0, one_var),
18912 one_var))
18913 return;
18914
18915 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18916 return;
18917 }
18918
18919 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18920 }
18921
18922 void
18923 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18924 {
18925 enum machine_mode mode = GET_MODE (target);
18926 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18927 bool use_vec_merge = false;
18928 rtx tmp;
18929
18930 switch (mode)
18931 {
18932 case V2SFmode:
18933 case V2SImode:
18934 if (mmx_ok)
18935 {
18936 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18937 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18938 if (elt == 0)
18939 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18940 else
18941 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18942 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18943 return;
18944 }
18945 break;
18946
18947 case V2DFmode:
18948 case V2DImode:
18949 {
18950 rtx op0, op1;
18951
18952 /* For the two element vectors, we implement a VEC_CONCAT with
18953 the extraction of the other element. */
18954
18955 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18956 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18957
18958 if (elt == 0)
18959 op0 = val, op1 = tmp;
18960 else
18961 op0 = tmp, op1 = val;
18962
18963 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18964 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18965 }
18966 return;
18967
18968 case V4SFmode:
18969 switch (elt)
18970 {
18971 case 0:
18972 use_vec_merge = true;
18973 break;
18974
18975 case 1:
18976 /* tmp = target = A B C D */
18977 tmp = copy_to_reg (target);
18978 /* target = A A B B */
18979 emit_insn (gen_sse_unpcklps (target, target, target));
18980 /* target = X A B B */
18981 ix86_expand_vector_set (false, target, val, 0);
18982 /* target = A X C D */
18983 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18984 GEN_INT (1), GEN_INT (0),
18985 GEN_INT (2+4), GEN_INT (3+4)));
18986 return;
18987
18988 case 2:
18989 /* tmp = target = A B C D */
18990 tmp = copy_to_reg (target);
18991 /* tmp = X B C D */
18992 ix86_expand_vector_set (false, tmp, val, 0);
18993 /* target = A B X D */
18994 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18995 GEN_INT (0), GEN_INT (1),
18996 GEN_INT (0+4), GEN_INT (3+4)));
18997 return;
18998
18999 case 3:
19000 /* tmp = target = A B C D */
19001 tmp = copy_to_reg (target);
19002 /* tmp = X B C D */
19003 ix86_expand_vector_set (false, tmp, val, 0);
19004 /* target = A B X D */
19005 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19006 GEN_INT (0), GEN_INT (1),
19007 GEN_INT (2+4), GEN_INT (0+4)));
19008 return;
19009
19010 default:
19011 gcc_unreachable ();
19012 }
19013 break;
19014
19015 case V4SImode:
19016 /* Element 0 handled by vec_merge below. */
19017 if (elt == 0)
19018 {
19019 use_vec_merge = true;
19020 break;
19021 }
19022
19023 if (TARGET_SSE2)
19024 {
19025 /* With SSE2, use integer shuffles to swap element 0 and ELT,
19026 store into element 0, then shuffle them back. */
19027
19028 rtx order[4];
19029
19030 order[0] = GEN_INT (elt);
19031 order[1] = const1_rtx;
19032 order[2] = const2_rtx;
19033 order[3] = GEN_INT (3);
19034 order[elt] = const0_rtx;
19035
19036 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19037 order[1], order[2], order[3]));
19038
19039 ix86_expand_vector_set (false, target, val, 0);
19040
19041 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19042 order[1], order[2], order[3]));
19043 }
19044 else
19045 {
19046 /* For SSE1, we have to reuse the V4SF code. */
19047 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
19048 gen_lowpart (SFmode, val), elt);
19049 }
19050 return;
19051
19052 case V8HImode:
19053 use_vec_merge = TARGET_SSE2;
19054 break;
19055 case V4HImode:
19056 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19057 break;
19058
19059 case V16QImode:
19060 case V8QImode:
19061 default:
19062 break;
19063 }
19064
19065 if (use_vec_merge)
19066 {
19067 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
19068 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
19069 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19070 }
19071 else
19072 {
19073 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19074
19075 emit_move_insn (mem, target);
19076
19077 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19078 emit_move_insn (tmp, val);
19079
19080 emit_move_insn (target, mem);
19081 }
19082 }
19083
19084 void
19085 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
19086 {
19087 enum machine_mode mode = GET_MODE (vec);
19088 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19089 bool use_vec_extr = false;
19090 rtx tmp;
19091
19092 switch (mode)
19093 {
19094 case V2SImode:
19095 case V2SFmode:
19096 if (!mmx_ok)
19097 break;
19098 /* FALLTHRU */
19099
19100 case V2DFmode:
19101 case V2DImode:
19102 use_vec_extr = true;
19103 break;
19104
19105 case V4SFmode:
19106 switch (elt)
19107 {
19108 case 0:
19109 tmp = vec;
19110 break;
19111
19112 case 1:
19113 case 3:
19114 tmp = gen_reg_rtx (mode);
19115 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
19116 GEN_INT (elt), GEN_INT (elt),
19117 GEN_INT (elt+4), GEN_INT (elt+4)));
19118 break;
19119
19120 case 2:
19121 tmp = gen_reg_rtx (mode);
19122 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
19123 break;
19124
19125 default:
19126 gcc_unreachable ();
19127 }
19128 vec = tmp;
19129 use_vec_extr = true;
19130 elt = 0;
19131 break;
19132
19133 case V4SImode:
19134 if (TARGET_SSE2)
19135 {
19136 switch (elt)
19137 {
19138 case 0:
19139 tmp = vec;
19140 break;
19141
19142 case 1:
19143 case 3:
19144 tmp = gen_reg_rtx (mode);
19145 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
19146 GEN_INT (elt), GEN_INT (elt),
19147 GEN_INT (elt), GEN_INT (elt)));
19148 break;
19149
19150 case 2:
19151 tmp = gen_reg_rtx (mode);
19152 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
19153 break;
19154
19155 default:
19156 gcc_unreachable ();
19157 }
19158 vec = tmp;
19159 use_vec_extr = true;
19160 elt = 0;
19161 }
19162 else
19163 {
19164 /* For SSE1, we have to reuse the V4SF code. */
19165 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
19166 gen_lowpart (V4SFmode, vec), elt);
19167 return;
19168 }
19169 break;
19170
19171 case V8HImode:
19172 use_vec_extr = TARGET_SSE2;
19173 break;
19174 case V4HImode:
19175 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19176 break;
19177
19178 case V16QImode:
19179 case V8QImode:
19180 /* ??? Could extract the appropriate HImode element and shift. */
19181 default:
19182 break;
19183 }
19184
19185 if (use_vec_extr)
19186 {
19187 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
19188 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
19189
19190 /* Let the rtl optimizers know about the zero extension performed. */
19191 if (inner_mode == HImode)
19192 {
19193 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
19194 target = gen_lowpart (SImode, target);
19195 }
19196
19197 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19198 }
19199 else
19200 {
19201 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19202
19203 emit_move_insn (mem, vec);
19204
19205 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19206 emit_move_insn (target, tmp);
19207 }
19208 }
19209
19210 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
19211 pattern to reduce; DEST is the destination; IN is the input vector. */
19212
19213 void
19214 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
19215 {
19216 rtx tmp1, tmp2, tmp3;
19217
19218 tmp1 = gen_reg_rtx (V4SFmode);
19219 tmp2 = gen_reg_rtx (V4SFmode);
19220 tmp3 = gen_reg_rtx (V4SFmode);
19221
19222 emit_insn (gen_sse_movhlps (tmp1, in, in));
19223 emit_insn (fn (tmp2, tmp1, in));
19224
19225 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
19226 GEN_INT (1), GEN_INT (1),
19227 GEN_INT (1+4), GEN_INT (1+4)));
19228 emit_insn (fn (dest, tmp2, tmp3));
19229 }
19230 \f
19231 /* Target hook for scalar_mode_supported_p. */
19232 static bool
19233 ix86_scalar_mode_supported_p (enum machine_mode mode)
19234 {
19235 if (DECIMAL_FLOAT_MODE_P (mode))
19236 return true;
19237 else
19238 return default_scalar_mode_supported_p (mode);
19239 }
19240
19241 /* Implements target hook vector_mode_supported_p. */
19242 static bool
19243 ix86_vector_mode_supported_p (enum machine_mode mode)
19244 {
19245 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19246 return true;
19247 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19248 return true;
19249 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
19250 return true;
19251 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
19252 return true;
19253 return false;
19254 }
19255
19256 /* Worker function for TARGET_MD_ASM_CLOBBERS.
19257
19258 We do this in the new i386 backend to maintain source compatibility
19259 with the old cc0-based compiler. */
19260
19261 static tree
19262 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
19263 tree inputs ATTRIBUTE_UNUSED,
19264 tree clobbers)
19265 {
19266 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
19267 clobbers);
19268 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
19269 clobbers);
19270 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
19271 clobbers);
19272 return clobbers;
19273 }
19274
19275 /* Return true if this goes in small data/bss. */
19276
19277 static bool
19278 ix86_in_large_data_p (tree exp)
19279 {
19280 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
19281 return false;
19282
19283 /* Functions are never large data. */
19284 if (TREE_CODE (exp) == FUNCTION_DECL)
19285 return false;
19286
19287 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
19288 {
19289 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
19290 if (strcmp (section, ".ldata") == 0
19291 || strcmp (section, ".lbss") == 0)
19292 return true;
19293 return false;
19294 }
19295 else
19296 {
19297 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
19298
19299 /* If this is an incomplete type with size 0, then we can't put it
19300 in data because it might be too big when completed. */
19301 if (!size || size > ix86_section_threshold)
19302 return true;
19303 }
19304
19305 return false;
19306 }
19307 static void
19308 ix86_encode_section_info (tree decl, rtx rtl, int first)
19309 {
19310 default_encode_section_info (decl, rtl, first);
19311
19312 if (TREE_CODE (decl) == VAR_DECL
19313 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19314 && ix86_in_large_data_p (decl))
19315 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19316 }
19317
19318 /* Worker function for REVERSE_CONDITION. */
19319
19320 enum rtx_code
19321 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19322 {
19323 return (mode != CCFPmode && mode != CCFPUmode
19324 ? reverse_condition (code)
19325 : reverse_condition_maybe_unordered (code));
19326 }
19327
19328 /* Output code to perform an x87 FP register move, from OPERANDS[1]
19329 to OPERANDS[0]. */
19330
19331 const char *
19332 output_387_reg_move (rtx insn, rtx *operands)
19333 {
19334 if (REG_P (operands[1])
19335 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19336 {
19337 if (REGNO (operands[0]) == FIRST_STACK_REG)
19338 return output_387_ffreep (operands, 0);
19339 return "fstp\t%y0";
19340 }
19341 if (STACK_TOP_P (operands[0]))
19342 return "fld%z1\t%y1";
19343 return "fst\t%y0";
19344 }
19345
19346 /* Output code to perform a conditional jump to LABEL, if C2 flag in
19347 FP status register is set. */
19348
19349 void
19350 ix86_emit_fp_unordered_jump (rtx label)
19351 {
19352 rtx reg = gen_reg_rtx (HImode);
19353 rtx temp;
19354
19355 emit_insn (gen_x86_fnstsw_1 (reg));
19356
19357 if (TARGET_USE_SAHF)
19358 {
19359 emit_insn (gen_x86_sahf_1 (reg));
19360
19361 temp = gen_rtx_REG (CCmode, FLAGS_REG);
19362 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19363 }
19364 else
19365 {
19366 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19367
19368 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19369 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19370 }
19371
19372 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19373 gen_rtx_LABEL_REF (VOIDmode, label),
19374 pc_rtx);
19375 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19376 emit_jump_insn (temp);
19377 }
19378
19379 /* Output code to perform a log1p XFmode calculation. */
19380
19381 void ix86_emit_i387_log1p (rtx op0, rtx op1)
19382 {
19383 rtx label1 = gen_label_rtx ();
19384 rtx label2 = gen_label_rtx ();
19385
19386 rtx tmp = gen_reg_rtx (XFmode);
19387 rtx tmp2 = gen_reg_rtx (XFmode);
19388
19389 emit_insn (gen_absxf2 (tmp, op1));
19390 emit_insn (gen_cmpxf (tmp,
19391 CONST_DOUBLE_FROM_REAL_VALUE (
19392 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19393 XFmode)));
19394 emit_jump_insn (gen_bge (label1));
19395
19396 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19397 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19398 emit_jump (label2);
19399
19400 emit_label (label1);
19401 emit_move_insn (tmp, CONST1_RTX (XFmode));
19402 emit_insn (gen_addxf3 (tmp, op1, tmp));
19403 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19404 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19405
19406 emit_label (label2);
19407 }
19408
19409 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
19410
19411 static void
19412 i386_solaris_elf_named_section (const char *name, unsigned int flags,
19413 tree decl)
19414 {
19415 /* With Binutils 2.15, the "@unwind" marker must be specified on
19416 every occurrence of the ".eh_frame" section, not just the first
19417 one. */
19418 if (TARGET_64BIT
19419 && strcmp (name, ".eh_frame") == 0)
19420 {
19421 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19422 flags & SECTION_WRITE ? "aw" : "a");
19423 return;
19424 }
19425 default_elf_asm_named_section (name, flags, decl);
19426 }
19427
19428 /* Return the mangling of TYPE if it is an extended fundamental type. */
19429
19430 static const char *
19431 ix86_mangle_fundamental_type (tree type)
19432 {
19433 switch (TYPE_MODE (type))
19434 {
19435 case TFmode:
19436 /* __float128 is "g". */
19437 return "g";
19438 case XFmode:
19439 /* "long double" or __float80 is "e". */
19440 return "e";
19441 default:
19442 return NULL;
19443 }
19444 }
19445
19446 /* For 32-bit code we can save PIC register setup by using
19447 __stack_chk_fail_local hidden function instead of calling
19448 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
19449 register, so it is better to call __stack_chk_fail directly. */
19450
19451 static tree
19452 ix86_stack_protect_fail (void)
19453 {
19454 return TARGET_64BIT
19455 ? default_external_stack_protect_fail ()
19456 : default_hidden_stack_protect_fail ();
19457 }
19458
19459 /* Select a format to encode pointers in exception handling data. CODE
19460 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
19461 true if the symbol may be affected by dynamic relocations.
19462
19463 ??? All x86 object file formats are capable of representing this.
19464 After all, the relocation needed is the same as for the call insn.
19465 Whether or not a particular assembler allows us to enter such, I
19466 guess we'll have to see. */
19467 int
19468 asm_preferred_eh_data_format (int code, int global)
19469 {
19470 if (flag_pic)
19471 {
19472 int type = DW_EH_PE_sdata8;
19473 if (!TARGET_64BIT
19474 || ix86_cmodel == CM_SMALL_PIC
19475 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19476 type = DW_EH_PE_sdata4;
19477 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19478 }
19479 if (ix86_cmodel == CM_SMALL
19480 || (ix86_cmodel == CM_MEDIUM && code))
19481 return DW_EH_PE_udata4;
19482 return DW_EH_PE_absptr;
19483 }
19484 \f
19485 /* Expand copysign from SIGN to the positive value ABS_VALUE
19486 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
19487 the sign-bit. */
19488 static void
19489 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
19490 {
19491 enum machine_mode mode = GET_MODE (sign);
19492 rtx sgn = gen_reg_rtx (mode);
19493 if (mask == NULL_RTX)
19494 {
19495 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
19496 if (!VECTOR_MODE_P (mode))
19497 {
19498 /* We need to generate a scalar mode mask in this case. */
19499 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
19500 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
19501 mask = gen_reg_rtx (mode);
19502 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
19503 }
19504 }
19505 else
19506 mask = gen_rtx_NOT (mode, mask);
19507 emit_insn (gen_rtx_SET (VOIDmode, sgn,
19508 gen_rtx_AND (mode, mask, sign)));
19509 emit_insn (gen_rtx_SET (VOIDmode, result,
19510 gen_rtx_IOR (mode, abs_value, sgn)));
19511 }
19512
19513 /* Expand fabs (OP0) and return a new rtx that holds the result. The
19514 mask for masking out the sign-bit is stored in *SMASK, if that is
19515 non-null. */
19516 static rtx
19517 ix86_expand_sse_fabs (rtx op0, rtx *smask)
19518 {
19519 enum machine_mode mode = GET_MODE (op0);
19520 rtx xa, mask;
19521
19522 xa = gen_reg_rtx (mode);
19523 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
19524 if (!VECTOR_MODE_P (mode))
19525 {
19526 /* We need to generate a scalar mode mask in this case. */
19527 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
19528 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
19529 mask = gen_reg_rtx (mode);
19530 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
19531 }
19532 emit_insn (gen_rtx_SET (VOIDmode, xa,
19533 gen_rtx_AND (mode, op0, mask)));
19534
19535 if (smask)
19536 *smask = mask;
19537
19538 return xa;
19539 }
19540
19541 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
19542 swapping the operands if SWAP_OPERANDS is true. The expanded
19543 code is a forward jump to a newly created label in case the
19544 comparison is true. The generated label rtx is returned. */
19545 static rtx
19546 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
19547 bool swap_operands)
19548 {
19549 rtx label, tmp;
19550
19551 if (swap_operands)
19552 {
19553 tmp = op0;
19554 op0 = op1;
19555 op1 = tmp;
19556 }
19557
19558 label = gen_label_rtx ();
19559 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
19560 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19561 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
19562 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
19563 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19564 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
19565 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19566 JUMP_LABEL (tmp) = label;
19567
19568 return label;
19569 }
19570
19571 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
19572 using comparison code CODE. Operands are swapped for the comparison if
19573 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
19574 static rtx
19575 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
19576 bool swap_operands)
19577 {
19578 enum machine_mode mode = GET_MODE (op0);
19579 rtx mask = gen_reg_rtx (mode);
19580
19581 if (swap_operands)
19582 {
19583 rtx tmp = op0;
19584 op0 = op1;
19585 op1 = tmp;
19586 }
19587
19588 if (mode == DFmode)
19589 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
19590 gen_rtx_fmt_ee (code, mode, op0, op1)));
19591 else
19592 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
19593 gen_rtx_fmt_ee (code, mode, op0, op1)));
19594
19595 return mask;
19596 }
19597
19598 /* Generate and return a rtx of mode MODE for 2**n where n is the number
19599 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
19600 static rtx
19601 ix86_gen_TWO52 (enum machine_mode mode)
19602 {
19603 REAL_VALUE_TYPE TWO52r;
19604 rtx TWO52;
19605
19606 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
19607 TWO52 = const_double_from_real_value (TWO52r, mode);
19608 TWO52 = force_reg (mode, TWO52);
19609
19610 return TWO52;
19611 }
19612
19613 /* Expand SSE sequence for computing lround from OP1 storing
19614 into OP0. */
19615 void
19616 ix86_expand_lround (rtx op0, rtx op1)
19617 {
19618 /* C code for the stuff we're doing below:
19619 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
19620 return (long)tmp;
19621 */
19622 enum machine_mode mode = GET_MODE (op1);
19623 const struct real_format *fmt;
19624 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
19625 rtx adj;
19626
19627 /* load nextafter (0.5, 0.0) */
19628 fmt = REAL_MODE_FORMAT (mode);
19629 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
19630 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
19631
19632 /* adj = copysign (0.5, op1) */
19633 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
19634 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
19635
19636 /* adj = op1 + adj */
19637 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
19638
19639 /* op0 = (imode)adj */
19640 expand_fix (op0, adj, 0);
19641 }
19642
19643 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
19644 into OPERAND0. */
19645 void
19646 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
19647 {
19648 /* C code for the stuff we're doing below (for do_floor):
19649 xi = (long)op1;
19650 xi -= (double)xi > op1 ? 1 : 0;
19651 return xi;
19652 */
19653 enum machine_mode fmode = GET_MODE (op1);
19654 enum machine_mode imode = GET_MODE (op0);
19655 rtx ireg, freg, label, tmp;
19656
19657 /* reg = (long)op1 */
19658 ireg = gen_reg_rtx (imode);
19659 expand_fix (ireg, op1, 0);
19660
19661 /* freg = (double)reg */
19662 freg = gen_reg_rtx (fmode);
19663 expand_float (freg, ireg, 0);
19664
19665 /* ireg = (freg > op1) ? ireg - 1 : ireg */
19666 label = ix86_expand_sse_compare_and_jump (UNLE,
19667 freg, op1, !do_floor);
19668 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
19669 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
19670 emit_move_insn (ireg, tmp);
19671
19672 emit_label (label);
19673 LABEL_NUSES (label) = 1;
19674
19675 emit_move_insn (op0, ireg);
19676 }
19677
19678 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
19679 result in OPERAND0. */
19680 void
19681 ix86_expand_rint (rtx operand0, rtx operand1)
19682 {
19683 /* C code for the stuff we're doing below:
19684 xa = fabs (operand1);
19685 if (!isless (xa, 2**52))
19686 return operand1;
19687 xa = xa + 2**52 - 2**52;
19688 return copysign (xa, operand1);
19689 */
19690 enum machine_mode mode = GET_MODE (operand0);
19691 rtx res, xa, label, TWO52, mask;
19692
19693 res = gen_reg_rtx (mode);
19694 emit_move_insn (res, operand1);
19695
19696 /* xa = abs (operand1) */
19697 xa = ix86_expand_sse_fabs (res, &mask);
19698
19699 /* if (!isless (xa, TWO52)) goto label; */
19700 TWO52 = ix86_gen_TWO52 (mode);
19701 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19702
19703 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
19704 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19705
19706 ix86_sse_copysign_to_positive (res, xa, res, mask);
19707
19708 emit_label (label);
19709 LABEL_NUSES (label) = 1;
19710
19711 emit_move_insn (operand0, res);
19712 }
19713
19714 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19715 into OPERAND0. */
19716 void
19717 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
19718 {
19719 /* C code for the stuff we expand below.
19720 double xa = fabs (x), x2;
19721 if (!isless (xa, TWO52))
19722 return x;
19723 xa = xa + TWO52 - TWO52;
19724 x2 = copysign (xa, x);
19725 Compensate. Floor:
19726 if (x2 > x)
19727 x2 -= 1;
19728 Compensate. Ceil:
19729 if (x2 < x)
19730 x2 -= -1;
19731 return x2;
19732 */
19733 enum machine_mode mode = GET_MODE (operand0);
19734 rtx xa, TWO52, tmp, label, one, res, mask;
19735
19736 TWO52 = ix86_gen_TWO52 (mode);
19737
19738 /* Temporary for holding the result, initialized to the input
19739 operand to ease control flow. */
19740 res = gen_reg_rtx (mode);
19741 emit_move_insn (res, operand1);
19742
19743 /* xa = abs (operand1) */
19744 xa = ix86_expand_sse_fabs (res, &mask);
19745
19746 /* if (!isless (xa, TWO52)) goto label; */
19747 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19748
19749 /* xa = xa + TWO52 - TWO52; */
19750 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
19751 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19752
19753 /* xa = copysign (xa, operand1) */
19754 ix86_sse_copysign_to_positive (xa, xa, res, mask);
19755
19756 /* generate 1.0 or -1.0 */
19757 one = force_reg (mode,
19758 const_double_from_real_value (do_floor
19759 ? dconst1 : dconstm1, mode));
19760
19761 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19762 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
19763 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19764 gen_rtx_AND (mode, one, tmp)));
19765 /* We always need to subtract here to preserve signed zero. */
19766 tmp = expand_simple_binop (mode, MINUS,
19767 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
19768 emit_move_insn (res, tmp);
19769
19770 emit_label (label);
19771 LABEL_NUSES (label) = 1;
19772
19773 emit_move_insn (operand0, res);
19774 }
19775
19776 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19777 into OPERAND0. */
19778 void
19779 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
19780 {
19781 /* C code for the stuff we expand below.
19782 double xa = fabs (x), x2;
19783 if (!isless (xa, TWO52))
19784 return x;
19785 x2 = (double)(long)x;
19786 Compensate. Floor:
19787 if (x2 > x)
19788 x2 -= 1;
19789 Compensate. Ceil:
19790 if (x2 < x)
19791 x2 += 1;
19792 if (HONOR_SIGNED_ZEROS (mode))
19793 return copysign (x2, x);
19794 return x2;
19795 */
19796 enum machine_mode mode = GET_MODE (operand0);
19797 rtx xa, xi, TWO52, tmp, label, one, res, mask;
19798
19799 TWO52 = ix86_gen_TWO52 (mode);
19800
19801 /* Temporary for holding the result, initialized to the input
19802 operand to ease control flow. */
19803 res = gen_reg_rtx (mode);
19804 emit_move_insn (res, operand1);
19805
19806 /* xa = abs (operand1) */
19807 xa = ix86_expand_sse_fabs (res, &mask);
19808
19809 /* if (!isless (xa, TWO52)) goto label; */
19810 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19811
19812 /* xa = (double)(long)x */
19813 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
19814 expand_fix (xi, res, 0);
19815 expand_float (xa, xi, 0);
19816
19817 /* generate 1.0 */
19818 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
19819
19820 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19821 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
19822 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19823 gen_rtx_AND (mode, one, tmp)));
19824 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
19825 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
19826 emit_move_insn (res, tmp);
19827
19828 if (HONOR_SIGNED_ZEROS (mode))
19829 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
19830
19831 emit_label (label);
19832 LABEL_NUSES (label) = 1;
19833
19834 emit_move_insn (operand0, res);
19835 }
19836
19837 /* Expand SSE sequence for computing round from OPERAND1 storing
19838 into OPERAND0. Sequence that works without relying on DImode truncation
19839 via cvttsd2siq that is only available on 64bit targets. */
19840 void
19841 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
19842 {
19843 /* C code for the stuff we expand below.
19844 double xa = fabs (x), xa2, x2;
19845 if (!isless (xa, TWO52))
19846 return x;
19847 Using the absolute value and copying back sign makes
19848 -0.0 -> -0.0 correct.
19849 xa2 = xa + TWO52 - TWO52;
19850 Compensate.
19851 dxa = xa2 - xa;
19852 if (dxa <= -0.5)
19853 xa2 += 1;
19854 else if (dxa > 0.5)
19855 xa2 -= 1;
19856 x2 = copysign (xa2, x);
19857 return x2;
19858 */
19859 enum machine_mode mode = GET_MODE (operand0);
19860 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
19861
19862 TWO52 = ix86_gen_TWO52 (mode);
19863
19864 /* Temporary for holding the result, initialized to the input
19865 operand to ease control flow. */
19866 res = gen_reg_rtx (mode);
19867 emit_move_insn (res, operand1);
19868
19869 /* xa = abs (operand1) */
19870 xa = ix86_expand_sse_fabs (res, &mask);
19871
19872 /* if (!isless (xa, TWO52)) goto label; */
19873 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19874
19875 /* xa2 = xa + TWO52 - TWO52; */
19876 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
19877 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
19878
19879 /* dxa = xa2 - xa; */
19880 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
19881
19882 /* generate 0.5, 1.0 and -0.5 */
19883 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
19884 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
19885 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
19886 0, OPTAB_DIRECT);
19887
19888 /* Compensate. */
19889 tmp = gen_reg_rtx (mode);
19890 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
19891 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
19892 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19893 gen_rtx_AND (mode, one, tmp)));
19894 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
19895 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
19896 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
19897 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19898 gen_rtx_AND (mode, one, tmp)));
19899 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
19900
19901 /* res = copysign (xa2, operand1) */
19902 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
19903
19904 emit_label (label);
19905 LABEL_NUSES (label) = 1;
19906
19907 emit_move_insn (operand0, res);
19908 }
19909
19910 /* Expand SSE sequence for computing trunc from OPERAND1 storing
19911 into OPERAND0. */
19912 void
19913 ix86_expand_trunc (rtx operand0, rtx operand1)
19914 {
19915 /* C code for SSE variant we expand below.
19916 double xa = fabs (x), x2;
19917 if (!isless (xa, TWO52))
19918 return x;
19919 x2 = (double)(long)x;
19920 if (HONOR_SIGNED_ZEROS (mode))
19921 return copysign (x2, x);
19922 return x2;
19923 */
19924 enum machine_mode mode = GET_MODE (operand0);
19925 rtx xa, xi, TWO52, label, res, mask;
19926
19927 TWO52 = ix86_gen_TWO52 (mode);
19928
19929 /* Temporary for holding the result, initialized to the input
19930 operand to ease control flow. */
19931 res = gen_reg_rtx (mode);
19932 emit_move_insn (res, operand1);
19933
19934 /* xa = abs (operand1) */
19935 xa = ix86_expand_sse_fabs (res, &mask);
19936
19937 /* if (!isless (xa, TWO52)) goto label; */
19938 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19939
19940 /* x = (double)(long)x */
19941 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
19942 expand_fix (xi, res, 0);
19943 expand_float (res, xi, 0);
19944
19945 if (HONOR_SIGNED_ZEROS (mode))
19946 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
19947
19948 emit_label (label);
19949 LABEL_NUSES (label) = 1;
19950
19951 emit_move_insn (operand0, res);
19952 }
19953
19954 /* Expand SSE sequence for computing trunc from OPERAND1 storing
19955 into OPERAND0. */
19956 void
19957 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
19958 {
19959 enum machine_mode mode = GET_MODE (operand0);
19960 rtx xa, mask, TWO52, label, one, res, smask, tmp;
19961
19962 /* C code for SSE variant we expand below.
19963 double xa = fabs (x), x2;
19964 if (!isless (xa, TWO52))
19965 return x;
19966 xa2 = xa + TWO52 - TWO52;
19967 Compensate:
19968 if (xa2 > xa)
19969 xa2 -= 1.0;
19970 x2 = copysign (xa2, x);
19971 return x2;
19972 */
19973
19974 TWO52 = ix86_gen_TWO52 (mode);
19975
19976 /* Temporary for holding the result, initialized to the input
19977 operand to ease control flow. */
19978 res = gen_reg_rtx (mode);
19979 emit_move_insn (res, operand1);
19980
19981 /* xa = abs (operand1) */
19982 xa = ix86_expand_sse_fabs (res, &smask);
19983
19984 /* if (!isless (xa, TWO52)) goto label; */
19985 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19986
19987 /* res = xa + TWO52 - TWO52; */
19988 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
19989 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
19990 emit_move_insn (res, tmp);
19991
19992 /* generate 1.0 */
19993 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
19994
19995 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
19996 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
19997 emit_insn (gen_rtx_SET (VOIDmode, mask,
19998 gen_rtx_AND (mode, mask, one)));
19999 tmp = expand_simple_binop (mode, MINUS,
20000 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
20001 emit_move_insn (res, tmp);
20002
20003 /* res = copysign (res, operand1) */
20004 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
20005
20006 emit_label (label);
20007 LABEL_NUSES (label) = 1;
20008
20009 emit_move_insn (operand0, res);
20010 }
20011
20012 /* Expand SSE sequence for computing round from OPERAND1 storing
20013 into OPERAND0. */
20014 void
20015 ix86_expand_round (rtx operand0, rtx operand1)
20016 {
20017 /* C code for the stuff we're doing below:
20018 double xa = fabs (x);
20019 if (!isless (xa, TWO52))
20020 return x;
20021 xa = (double)(long)(xa + nextafter (0.5, 0.0));
20022 return copysign (xa, x);
20023 */
20024 enum machine_mode mode = GET_MODE (operand0);
20025 rtx res, TWO52, xa, label, xi, half, mask;
20026 const struct real_format *fmt;
20027 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
20028
20029 /* Temporary for holding the result, initialized to the input
20030 operand to ease control flow. */
20031 res = gen_reg_rtx (mode);
20032 emit_move_insn (res, operand1);
20033
20034 TWO52 = ix86_gen_TWO52 (mode);
20035 xa = ix86_expand_sse_fabs (res, &mask);
20036 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20037
20038 /* load nextafter (0.5, 0.0) */
20039 fmt = REAL_MODE_FORMAT (mode);
20040 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
20041 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
20042
20043 /* xa = xa + 0.5 */
20044 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
20045 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
20046
20047 /* xa = (double)(int64_t)xa */
20048 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
20049 expand_fix (xi, xa, 0);
20050 expand_float (xa, xi, 0);
20051
20052 /* res = copysign (xa, operand1) */
20053 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
20054
20055 emit_label (label);
20056 LABEL_NUSES (label) = 1;
20057
20058 emit_move_insn (operand0, res);
20059 }
20060
20061 #include "gt-i386.h"