[ARM/AArch64][2/2] Crypto intrinsics tuning for Cortex-A53 - pipeline description
[gcc.git] / gcc / config / arm / aarch-common.c
1 /* Dependency checks for instruction scheduling, shared between ARM and
2 AARCH64.
3
4 Copyright (C) 1991-2014 Free Software Foundation, Inc.
5 Contributed by ARM Ltd.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "tm_p.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "c-family/c-common.h"
32 #include "rtl.h"
33
34 /* In ARMv8-A there's a general expectation that AESE/AESMC
35 and AESD/AESIMC sequences of the form:
36
37 AESE Vn, _
38 AESMC Vn, Vn
39
40 will issue both instructions in a single cycle on super-scalar
41 implementations. This function identifies such pairs. */
42
43 int
44 aarch_crypto_can_dual_issue (rtx producer, rtx consumer)
45 {
46 rtx producer_src, consumer_src;
47
48 producer = single_set (producer);
49 consumer = single_set (consumer);
50
51 producer_src = producer ? SET_SRC (producer) : NULL;
52 consumer_src = consumer ? SET_SRC (consumer) : NULL;
53
54 if (producer_src && consumer_src
55 && GET_CODE (producer_src) == UNSPEC && GET_CODE (consumer_src) == UNSPEC
56 && ((XINT (producer_src, 1) == UNSPEC_AESE
57 && XINT (consumer_src, 1) == UNSPEC_AESMC)
58 || (XINT (producer_src, 1) == UNSPEC_AESD
59 && XINT (consumer_src, 1) == UNSPEC_AESIMC)))
60 {
61 unsigned int regno = REGNO (SET_DEST (producer));
62
63 return REGNO (SET_DEST (consumer)) == regno
64 && REGNO (XVECEXP (consumer_src, 0, 0)) == regno;
65 }
66
67 return 0;
68 }
69
70 typedef struct
71 {
72 rtx_code search_code;
73 rtx search_result;
74 bool find_any_shift;
75 } search_term;
76
77 /* Return TRUE if X is either an arithmetic shift left, or
78 is a multiplication by a power of two. */
79 bool
80 arm_rtx_shift_left_p (rtx x)
81 {
82 enum rtx_code code = GET_CODE (x);
83
84 if (code == MULT && CONST_INT_P (XEXP (x, 1))
85 && exact_log2 (INTVAL (XEXP (x, 1))) > 0)
86 return true;
87
88 if (code == ASHIFT)
89 return true;
90
91 return false;
92 }
93
94 static rtx_code shift_rtx_codes[] =
95 { ASHIFT, ROTATE, ASHIFTRT, LSHIFTRT,
96 ROTATERT, ZERO_EXTEND, SIGN_EXTEND };
97
98 /* Callback function for arm_find_sub_rtx_with_code.
99 DATA is safe to treat as a SEARCH_TERM, ST. This will
100 hold a SEARCH_CODE. PATTERN is checked to see if it is an
101 RTX with that code. If it is, write SEARCH_RESULT in ST
102 and return 1. Otherwise, or if we have been passed a NULL_RTX
103 return 0. If ST.FIND_ANY_SHIFT then we are interested in
104 anything which can reasonably be described as a SHIFT RTX. */
105 static int
106 arm_find_sub_rtx_with_search_term (rtx *pattern, void *data)
107 {
108 search_term *st = (search_term *) data;
109 rtx_code pattern_code;
110 int found = 0;
111
112 gcc_assert (pattern);
113 gcc_assert (st);
114
115 /* Poorly formed patterns can really ruin our day. */
116 if (*pattern == NULL_RTX)
117 return 0;
118
119 pattern_code = GET_CODE (*pattern);
120
121 if (st->find_any_shift)
122 {
123 unsigned i = 0;
124
125 /* Left shifts might have been canonicalized to a MULT of some
126 power of two. Make sure we catch them. */
127 if (arm_rtx_shift_left_p (*pattern))
128 found = 1;
129 else
130 for (i = 0; i < ARRAY_SIZE (shift_rtx_codes); i++)
131 if (pattern_code == shift_rtx_codes[i])
132 found = 1;
133 }
134
135 if (pattern_code == st->search_code)
136 found = 1;
137
138 if (found)
139 st->search_result = *pattern;
140
141 return found;
142 }
143
144 /* Traverse PATTERN looking for a sub-rtx with RTX_CODE CODE. */
145 static rtx
146 arm_find_sub_rtx_with_code (rtx pattern, rtx_code code, bool find_any_shift)
147 {
148 search_term st;
149 int result = 0;
150
151 gcc_assert (pattern != NULL_RTX);
152 st.search_code = code;
153 st.search_result = NULL_RTX;
154 st.find_any_shift = find_any_shift;
155 result = for_each_rtx (&pattern, arm_find_sub_rtx_with_search_term, &st);
156 if (result)
157 return st.search_result;
158 else
159 return NULL_RTX;
160 }
161
162 /* Traverse PATTERN looking for any sub-rtx which looks like a shift. */
163 static rtx
164 arm_find_shift_sub_rtx (rtx pattern)
165 {
166 return arm_find_sub_rtx_with_code (pattern, ASHIFT, true);
167 }
168
169 /* PRODUCER and CONSUMER are two potentially dependant RTX. PRODUCER
170 (possibly) contains a SET which will provide a result we can access
171 using the SET_DEST macro. We will place the RTX which would be
172 written by PRODUCER in SET_SOURCE.
173 Similarly, CONSUMER (possibly) contains a SET which has an operand
174 we can access using SET_SRC. We place this operand in
175 SET_DESTINATION.
176
177 Return nonzero if we found the SET RTX we expected. */
178 static int
179 arm_get_set_operands (rtx producer, rtx consumer,
180 rtx *set_source, rtx *set_destination)
181 {
182 rtx set_producer = arm_find_sub_rtx_with_code (producer, SET, false);
183 rtx set_consumer = arm_find_sub_rtx_with_code (consumer, SET, false);
184
185 if (set_producer && set_consumer)
186 {
187 *set_source = SET_DEST (set_producer);
188 *set_destination = SET_SRC (set_consumer);
189 return 1;
190 }
191 return 0;
192 }
193
194 /* Return nonzero if the CONSUMER instruction (a load) does need
195 PRODUCER's value to calculate the address. */
196 int
197 arm_early_load_addr_dep (rtx producer, rtx consumer)
198 {
199 rtx value, addr;
200
201 if (!arm_get_set_operands (producer, consumer, &value, &addr))
202 return 0;
203
204 return reg_overlap_mentioned_p (value, addr);
205 }
206
207 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
208 have an early register shift value or amount dependency on the
209 result of PRODUCER. */
210 int
211 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
212 {
213 rtx value, op;
214 rtx early_op;
215
216 if (!arm_get_set_operands (producer, consumer, &value, &op))
217 return 0;
218
219 if ((early_op = arm_find_shift_sub_rtx (op)))
220 {
221 if (REG_P (early_op))
222 early_op = op;
223
224 return !reg_overlap_mentioned_p (value, early_op);
225 }
226
227 return 0;
228 }
229
230 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
231 have an early register shift value dependency on the result of
232 PRODUCER. */
233 int
234 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
235 {
236 rtx value, op;
237 rtx early_op;
238
239 if (!arm_get_set_operands (producer, consumer, &value, &op))
240 return 0;
241
242 if ((early_op = arm_find_shift_sub_rtx (op)))
243 /* We want to check the value being shifted. */
244 if (!reg_overlap_mentioned_p (value, XEXP (early_op, 0)))
245 return 1;
246
247 return 0;
248 }
249
250 /* Return nonzero if the CONSUMER (a mul or mac op) does not
251 have an early register mult dependency on the result of
252 PRODUCER. */
253 int
254 arm_no_early_mul_dep (rtx producer, rtx consumer)
255 {
256 rtx value, op;
257
258 if (!arm_get_set_operands (producer, consumer, &value, &op))
259 return 0;
260
261 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
262 {
263 if (GET_CODE (XEXP (op, 0)) == MULT)
264 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
265 else
266 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
267 }
268
269 return 0;
270 }
271
272 /* Return nonzero if the CONSUMER instruction (a store) does not need
273 PRODUCER's value to calculate the address. */
274
275 int
276 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
277 {
278 rtx value = arm_find_sub_rtx_with_code (producer, SET, false);
279 rtx addr = arm_find_sub_rtx_with_code (consumer, SET, false);
280
281 if (value)
282 value = SET_DEST (value);
283
284 if (addr)
285 addr = SET_DEST (addr);
286
287 if (!value || !addr)
288 return 0;
289
290 return !reg_overlap_mentioned_p (value, addr);
291 }
292
293 /* Return nonzero if the CONSUMER instruction (a store) does need
294 PRODUCER's value to calculate the address. */
295
296 int
297 arm_early_store_addr_dep (rtx producer, rtx consumer)
298 {
299 return !arm_no_early_store_addr_dep (producer, consumer);
300 }
301
302 /* Return non-zero iff the consumer (a multiply-accumulate or a
303 multiple-subtract instruction) has an accumulator dependency on the
304 result of the producer and no other dependency on that result. It
305 does not check if the producer is multiply-accumulate instruction. */
306 int
307 arm_mac_accumulator_is_result (rtx producer, rtx consumer)
308 {
309 rtx result;
310 rtx op0, op1, acc;
311
312 producer = PATTERN (producer);
313 consumer = PATTERN (consumer);
314
315 if (GET_CODE (producer) == COND_EXEC)
316 producer = COND_EXEC_CODE (producer);
317 if (GET_CODE (consumer) == COND_EXEC)
318 consumer = COND_EXEC_CODE (consumer);
319
320 if (GET_CODE (producer) != SET)
321 return 0;
322
323 result = XEXP (producer, 0);
324
325 if (GET_CODE (consumer) != SET)
326 return 0;
327
328 /* Check that the consumer is of the form
329 (set (...) (plus (mult ...) (...)))
330 or
331 (set (...) (minus (...) (mult ...))). */
332 if (GET_CODE (XEXP (consumer, 1)) == PLUS)
333 {
334 if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT)
335 return 0;
336
337 op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0);
338 op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1);
339 acc = XEXP (XEXP (consumer, 1), 1);
340 }
341 else if (GET_CODE (XEXP (consumer, 1)) == MINUS)
342 {
343 if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT)
344 return 0;
345
346 op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0);
347 op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1);
348 acc = XEXP (XEXP (consumer, 1), 0);
349 }
350 else
351 return 0;
352
353 return (reg_overlap_mentioned_p (result, acc)
354 && !reg_overlap_mentioned_p (result, op0)
355 && !reg_overlap_mentioned_p (result, op1));
356 }
357
358 /* Return non-zero if the consumer (a multiply-accumulate instruction)
359 has an accumulator dependency on the result of the producer (a
360 multiplication instruction) and no other dependency on that result. */
361 int
362 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
363 {
364 rtx mul = PATTERN (producer);
365 rtx mac = PATTERN (consumer);
366 rtx mul_result;
367 rtx mac_op0, mac_op1, mac_acc;
368
369 if (GET_CODE (mul) == COND_EXEC)
370 mul = COND_EXEC_CODE (mul);
371 if (GET_CODE (mac) == COND_EXEC)
372 mac = COND_EXEC_CODE (mac);
373
374 /* Check that mul is of the form (set (...) (mult ...))
375 and mla is of the form (set (...) (plus (mult ...) (...))). */
376 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
377 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
378 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
379 return 0;
380
381 mul_result = XEXP (mul, 0);
382 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
383 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
384 mac_acc = XEXP (XEXP (mac, 1), 1);
385
386 return (reg_overlap_mentioned_p (mul_result, mac_acc)
387 && !reg_overlap_mentioned_p (mul_result, mac_op0)
388 && !reg_overlap_mentioned_p (mul_result, mac_op1));
389 }