1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 * Rob Clark <robclark@freedesktop.org>
29 #include "pipe/p_shader_tokens.h"
30 #include "util/u_math.h"
32 #include "freedreno_util.h"
39 * We currently require that scheduling ensures that we have enough nop's
40 * in all the right places. The legalize step mostly handles fixing up
41 * instruction flags ((ss)/(sy)/(ei)), and collapses sequences of nop's
42 * into fewer nop's w/ rpt flag.
45 struct ir3_legalize_ctx
{
46 struct ir3_block
*block
;
51 static void legalize(struct ir3_legalize_ctx
*ctx
)
53 struct ir3_block
*block
= ctx
->block
;
54 struct ir3_instruction
*n
;
55 struct ir3
*shader
= block
->shader
;
56 struct ir3_instruction
*end
=
57 ir3_instr_create(block
, 0, OPC_END
);
58 struct ir3_instruction
*last_input
= NULL
;
59 struct ir3_instruction
*last_rel
= NULL
;
60 regmask_t needs_ss_war
; /* write after read */
64 regmask_init(&needs_ss_war
);
65 regmask_init(&needs_ss
);
66 regmask_init(&needs_sy
);
68 shader
->instrs_count
= 0;
70 for (n
= block
->head
; n
; n
= n
->next
) {
71 struct ir3_register
*reg
;
78 struct ir3_register
*inloc
= n
->regs
[1];
79 assert(inloc
->flags
& IR3_REG_IMMED
);
80 ctx
->max_bary
= MAX2(ctx
->max_bary
, inloc
->iim_val
);
83 /* NOTE: consider dst register too.. it could happen that
84 * texture sample instruction (for example) writes some
85 * components which are unused. A subsequent instruction
86 * that writes the same register can race w/ the sam instr
87 * resulting in undefined results:
89 for (i
= 0; i
< n
->regs_count
; i
++) {
94 /* TODO: we probably only need (ss) for alu
95 * instr consuming sfu result.. need to make
96 * some tests for both this and (sy)..
98 if (regmask_get(&needs_ss
, reg
)) {
99 n
->flags
|= IR3_INSTR_SS
;
100 regmask_init(&needs_ss
);
103 if (regmask_get(&needs_sy
, reg
)) {
104 n
->flags
|= IR3_INSTR_SY
;
105 regmask_init(&needs_sy
);
109 /* TODO: is it valid to have address reg loaded from a
110 * relative src (ie. mova a0, c<a0.x+4>)? If so, the
111 * last_rel check below should be moved ahead of this:
113 if (reg
->flags
& IR3_REG_RELATIV
)
117 if (n
->regs_count
> 0) {
119 if (regmask_get(&needs_ss_war
, reg
)) {
120 n
->flags
|= IR3_INSTR_SS
;
121 regmask_init(&needs_ss_war
); // ??? I assume?
124 if (last_rel
&& (reg
->num
== regid(REG_A0
, 0))) {
125 last_rel
->flags
|= IR3_INSTR_UL
;
130 /* cat5+ does not have an (ss) bit, if needed we need to
131 * insert a nop to carry the sync flag. Would be kinda
132 * clever if we were aware of this during scheduling, but
133 * this should be a pretty rare case:
135 if ((n
->flags
& IR3_INSTR_SS
) && (n
->category
>= 5)) {
136 struct ir3_instruction
*nop
;
137 nop
= ir3_instr_create(block
, 0, OPC_NOP
);
138 nop
->flags
|= IR3_INSTR_SS
;
139 n
->flags
&= ~IR3_INSTR_SS
;
142 /* need to be able to set (ss) on first instruction: */
143 if ((shader
->instrs_count
== 0) && (n
->category
>= 5))
144 ir3_instr_create(block
, 0, OPC_NOP
);
146 if (is_nop(n
) && shader
->instrs_count
) {
147 struct ir3_instruction
*last
=
148 shader
->instrs
[shader
->instrs_count
-1];
149 if (is_nop(last
) && (last
->repeat
< 5)) {
151 last
->flags
|= n
->flags
;
156 shader
->instrs
[shader
->instrs_count
++] = n
;
159 regmask_set(&needs_ss
, n
->regs
[0]);
162 /* this ends up being the # of samp instructions.. but that
163 * is ok, everything else only cares whether it is zero or
164 * not. We do this here, rather than when we encounter a
165 * SAMP decl, because (especially in binning pass shader)
166 * the samp instruction(s) could get eliminated if the
167 * result is not used.
169 ctx
->has_samp
= true;
170 regmask_set(&needs_sy
, n
->regs
[0]);
171 } else if (is_mem(n
)) {
172 regmask_set(&needs_sy
, n
->regs
[0]);
175 /* both tex/sfu appear to not always immediately consume
176 * their src register(s):
178 if (is_tex(n
) || is_sfu(n
)) {
179 for (i
= 1; i
< n
->regs_count
; i
++) {
182 regmask_set(&needs_ss_war
, reg
);
191 /* special hack.. if using ldlv to bypass interpolation,
192 * we need to insert a dummy bary.f on which we can set
195 if (is_mem(last_input
) && (last_input
->opc
== OPC_LDLV
)) {
198 /* note that ir3_instr_create() inserts into
199 * shader->instrs[] and increments the count..
200 * so we need to bump up the cnt initially (to
201 * avoid it clobbering the last real instr) and
204 cnt
= ++shader
->instrs_count
;
206 /* inserting instructions would be a bit nicer if list.. */
207 for (i
= cnt
- 2; i
>= 0; i
--) {
208 if (shader
->instrs
[i
] == last_input
) {
210 /* (ss)bary.f (ei)r63.x, 0, r0.x */
211 last_input
= ir3_instr_create(block
, 2, OPC_BARY_F
);
212 last_input
->flags
|= IR3_INSTR_SS
;
213 ir3_reg_create(last_input
, regid(63, 0), 0);
214 ir3_reg_create(last_input
, 0, IR3_REG_IMMED
)->iim_val
= 0;
215 ir3_reg_create(last_input
, regid(0, 0), 0);
217 shader
->instrs
[i
+ 1] = last_input
;
221 shader
->instrs
[i
+ 1] = shader
->instrs
[i
];
224 shader
->instrs_count
= cnt
;
226 last_input
->regs
[0]->flags
|= IR3_REG_EI
;
230 last_rel
->flags
|= IR3_INSTR_UL
;
232 shader
->instrs
[shader
->instrs_count
++] = end
;
234 shader
->instrs
[0]->flags
|= IR3_INSTR_SS
| IR3_INSTR_SY
;
237 void ir3_block_legalize(struct ir3_block
*block
,
238 bool *has_samp
, int *max_bary
)
240 struct ir3_legalize_ctx ctx
= {
247 *has_samp
= ctx
.has_samp
;
248 *max_bary
= ctx
.max_bary
;