2 * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Jonathan Marek <jonathan@marek.ca>
34 #include "fd2_program.h"
35 #include "instr-a2xx.h"
45 /* num can mean different things
46 * ssa: index of instruction
47 * reg: index in ctx->reg array
48 * input: index in ctx->input array
49 * const: constant index (C0, C1, etc)
53 enum ir2_src_type type
: 2;
59 struct ir2_reg_component
{
60 uint8_t c
: 3; /* assigned x/y/z/w (7=dont write, for fetch instr) */
61 bool alloc
: 1; /* is it currently allocated */
62 uint8_t ref_count
; /* for ra */
66 uint8_t idx
; /* assigned hardware register */
71 /* block_idx to free on (-1 = free on ref_count==0) */
73 struct ir2_reg_component comp
[4];
88 /* instruction needs to be emitted (for scheduling) */
91 /* predicate value - (usually) same for entire block */
96 struct ir2_src src
[4];
108 instr_fetch_opc_t opc
: 5;
112 uint8_t const_idx_sel
;
122 /* store possible opcs, then we can choose vector/scalar instr */
123 instr_scalar_opc_t scalar_opc
: 6;
124 instr_vector_opc_t vector_opc
: 5;
126 uint8_t write_mask
: 4;
129 /* export idx (-1 no export) */
132 /* for scalarized 2 src instruction */
133 uint8_t src1_swizzle
;
136 /* jmp dst block_idx */
142 struct ir2_sched_instr
{
143 uint32_t reg_state
[8];
144 struct ir2_instr
*instr
, *instr_s
;
148 struct fd2_shader_stateobj
*so
;
150 unsigned block_idx
, pred_idx
;
152 bool block_has_jump
[64];
154 unsigned loop_last_block
[64];
159 /* ssa index of position output */
160 struct ir2_src position
;
162 /* to translate SSA ids to instruction ids */
163 int16_t ssa_map
[1024];
165 struct ir2_shader_info
*info
;
166 struct ir2_frag_linkage
*f
;
171 struct ir2_reg
* live_regs
[64];
172 uint32_t reg_state
[256/32]; /* 64*4 bits */
175 struct ir2_reg input
[16 + 1]; /* 16 + param */
178 struct ir2_reg reg
[64];
181 struct ir2_instr instr
[0x300];
182 unsigned instr_count
;
184 struct ir2_sched_instr instr_sched
[0x180];
185 unsigned instr_sched_count
;
188 void assemble(struct ir2_context
*ctx
, bool binning
);
190 void ir2_nir_compile(struct ir2_context
*ctx
, bool binning
);
192 void ra_count_refs(struct ir2_context
*ctx
);
193 void ra_reg(struct ir2_context
*ctx
, struct ir2_reg
*reg
, int force_idx
,
194 bool export
, uint8_t export_writemask
);
195 void ra_src_free(struct ir2_context
*ctx
, struct ir2_instr
*instr
);
196 void ra_block_free(struct ir2_context
*ctx
, unsigned block
);
200 IR2_SWIZZLE_Y
= 1 << 0,
201 IR2_SWIZZLE_Z
= 2 << 0,
202 IR2_SWIZZLE_W
= 3 << 0,
204 IR2_SWIZZLE_ZW
= 2 << 0 | 2 << 2,
206 IR2_SWIZZLE_XYW
= 0 << 0 | 0 << 2 | 1 << 4,
208 IR2_SWIZZLE_XXXX
= 0 << 0 | 3 << 2 | 2 << 4 | 1 << 6,
209 IR2_SWIZZLE_YYYY
= 1 << 0 | 0 << 2 | 3 << 4 | 2 << 6,
210 IR2_SWIZZLE_ZZZZ
= 2 << 0 | 1 << 2 | 0 << 4 | 3 << 6,
211 IR2_SWIZZLE_WWWW
= 3 << 0 | 2 << 2 | 1 << 4 | 0 << 6,
212 IR2_SWIZZLE_WYWW
= 3 << 0 | 0 << 2 | 1 << 4 | 0 << 6,
213 IR2_SWIZZLE_XYXY
= 0 << 0 | 0 << 2 | 2 << 4 | 2 << 6,
214 IR2_SWIZZLE_ZZXY
= 2 << 0 | 1 << 2 | 2 << 4 | 2 << 6,
215 IR2_SWIZZLE_YXZZ
= 1 << 0 | 3 << 2 | 0 << 4 | 3 << 6,
218 #define compile_error(ctx, args...) ({ \
223 static inline struct ir2_src
224 ir2_src(uint16_t num
, uint8_t swizzle
, enum ir2_src_type type
)
226 return (struct ir2_src
) {
233 /* ir2_assemble uses it .. */
234 struct ir2_src
ir2_zero(struct ir2_context
*ctx
);
236 #define ir2_foreach_instr(it, ctx) \
237 for (struct ir2_instr *it = (ctx)->instr; ({ \
238 while (it != &(ctx)->instr[(ctx)->instr_count] && it->type == IR2_NONE) it++; \
239 it != &(ctx)->instr[(ctx)->instr_count]; }); it++)
241 #define ir2_foreach_live_reg(it, ctx) \
242 for (struct ir2_reg **__ptr = (ctx)->live_regs, *it; ({ \
243 while (__ptr != &(ctx)->live_regs[64] && *__ptr == NULL) __ptr++; \
244 __ptr != &(ctx)->live_regs[64] ? (it=*__ptr) : NULL; }); it++)
246 #define ir2_foreach_avail(it) \
247 for (struct ir2_instr **__instrp = avail, *it; \
248 it = *__instrp, __instrp != &avail[avail_count]; __instrp++)
250 #define ir2_foreach_src(it, instr) \
251 for (struct ir2_src *it = instr->src; \
252 it != &instr->src[instr->src_count]; it++)
254 /* mask for register allocation
255 * 64 registers with 4 components each = 256 bits
261 static inline bool mask_isset(uint32_t * mask
, unsigned num
)
263 return ! !(mask
[num
/ 32] & 1 << num
% 32);
266 static inline void mask_set(uint32_t * mask
, unsigned num
)
268 mask
[num
/ 32] |= 1 << num
% 32;
271 static inline void mask_unset(uint32_t * mask
, unsigned num
)
273 mask
[num
/ 32] &= ~(1 << num
% 32);
276 static inline unsigned mask_reg(uint32_t * mask
, unsigned num
)
278 return mask
[num
/ 8] >> num
% 8 * 4 & 0xf;
281 static inline bool is_export(struct ir2_instr
*instr
)
283 return instr
->type
== IR2_ALU
&& instr
->alu
.export
>= 0;
286 static inline instr_alloc_type_t
export_buf(unsigned num
)
288 return num
< 32 ? SQ_PARAMETER_PIXEL
:
289 num
>= 62 ? SQ_POSITION
: SQ_MEMORY
;
292 /* component c for channel i */
293 static inline unsigned swiz_set(unsigned c
, unsigned i
)
295 return ((c
- i
) & 3) << i
* 2;
298 /* get swizzle in channel i */
299 static inline unsigned swiz_get(unsigned swiz
, unsigned i
)
301 return ((swiz
>> i
* 2) + i
) & 3;
304 static inline unsigned swiz_merge(unsigned swiz0
, unsigned swiz1
)
307 for (int i
= 0; i
< 4; i
++)
308 swiz
|= swiz_set(swiz_get(swiz0
, swiz_get(swiz1
, i
)), i
);
312 static inline void swiz_merge_p(uint8_t *swiz0
, unsigned swiz1
)
315 for (int i
= 0; i
< 4; i
++)
316 swiz
|= swiz_set(swiz_get(*swiz0
, swiz_get(swiz1
, i
)), i
);
320 static inline struct ir2_reg
* get_reg(struct ir2_instr
*instr
)
322 return instr
->is_ssa
? &instr
->ssa
: instr
->reg
;
325 static inline struct ir2_reg
*
326 get_reg_src(struct ir2_context
*ctx
, struct ir2_src
*src
)
330 return &ctx
->input
[src
->num
];
332 return &ctx
->instr
[src
->num
].ssa
;
334 return &ctx
->reg
[src
->num
];
340 /* gets a ncomp value for the dst */
341 static inline unsigned dst_ncomp(struct ir2_instr
*instr
)
344 return instr
->ssa
.ncomp
;
346 if (instr
->type
== IR2_FETCH
)
347 return instr
->reg
->ncomp
;
349 assert(instr
->type
== IR2_ALU
);
352 for (int i
= 0; i
< instr
->reg
->ncomp
; i
++)
353 ncomp
+= !!(instr
->alu
.write_mask
& 1 << i
);
357 /* gets a ncomp value for the src registers */
358 static inline unsigned src_ncomp(struct ir2_instr
*instr
)
360 if (instr
->type
== IR2_FETCH
) {
361 switch (instr
->fetch
.opc
) {
365 return instr
->fetch
.tex
.is_cube
? 3 : 2;
366 case TEX_SET_TEX_LOD
:
373 switch (instr
->alu
.scalar_opc
) {
374 case PRED_SETEs
... KILLONEs
:
380 switch (instr
->alu
.vector_opc
) {
387 case PRED_SETE_PUSHv
:
390 return dst_ncomp(instr
);