2 * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Jonathan Marek <jonathan@marek.ca>
34 #include "fd2_program.h"
35 #include "instr-a2xx.h"
45 /* num can mean different things
46 * ssa: index of instruction
47 * reg: index in ctx->reg array
48 * input: index in ctx->input array
49 * const: constant index (C0, C1, etc)
53 enum ir2_src_type type
: 2;
59 struct ir2_reg_component
{
60 uint8_t c
: 3; /* assigned x/y/z/w (7=dont write, for fetch instr) */
61 bool alloc
: 1; /* is it currently allocated */
62 uint8_t ref_count
; /* for ra */
66 uint8_t idx
; /* assigned hardware register */
71 /* block_idx to free on (-1 = free on ref_count==0) */
73 struct ir2_reg_component comp
[4];
88 /* instruction needs to be emitted (for scheduling) */
91 /* predicate value - (usually) same for entire block */
96 struct ir2_src src
[4];
108 instr_fetch_opc_t opc
: 5;
112 uint8_t const_idx_sel
;
122 /* store possible opcs, then we can choose vector/scalar instr */
123 instr_scalar_opc_t scalar_opc
: 6;
124 instr_vector_opc_t vector_opc
: 5;
126 uint8_t write_mask
: 4;
129 /* export idx (-1 no export) */
132 /* for scalarized 2 src instruction */
133 uint8_t src1_swizzle
;
136 /* jmp dst block_idx */
142 struct ir2_sched_instr
{
143 uint32_t reg_state
[8];
144 struct ir2_instr
*instr
, *instr_s
;
148 struct fd2_shader_stateobj
*so
;
150 unsigned block_idx
, pred_idx
;
152 bool block_has_jump
[64];
154 unsigned loop_last_block
[64];
159 /* ssa index of position output */
160 struct ir2_src position
;
162 /* to translate SSA ids to instruction ids */
163 int16_t ssa_map
[1024];
165 struct ir2_shader_info
*info
;
166 struct ir2_frag_linkage
*f
;
171 struct ir2_reg
* live_regs
[64];
172 uint32_t reg_state
[256/32]; /* 64*4 bits */
175 struct ir2_reg input
[16 + 1]; /* 16 + param */
178 struct ir2_reg reg
[64];
181 struct ir2_instr instr
[0x300];
182 unsigned instr_count
;
184 struct ir2_sched_instr instr_sched
[0x180];
185 unsigned instr_sched_count
;
188 void assemble(struct ir2_context
*ctx
, bool binning
);
190 void ir2_nir_compile(struct ir2_context
*ctx
, bool binning
);
192 void ra_count_refs(struct ir2_context
*ctx
);
193 void ra_reg(struct ir2_context
*ctx
, struct ir2_reg
*reg
, int force_idx
,
194 bool export
, uint8_t export_writemask
);
195 void ra_src_free(struct ir2_context
*ctx
, struct ir2_instr
*instr
);
196 void ra_block_free(struct ir2_context
*ctx
, unsigned block
);
198 void cp_src(struct ir2_context
*ctx
);
199 void cp_export(struct ir2_context
*ctx
);
203 IR2_SWIZZLE_Y
= 1 << 0,
204 IR2_SWIZZLE_Z
= 2 << 0,
205 IR2_SWIZZLE_W
= 3 << 0,
207 IR2_SWIZZLE_ZW
= 2 << 0 | 2 << 2,
209 IR2_SWIZZLE_XYW
= 0 << 0 | 0 << 2 | 1 << 4,
211 IR2_SWIZZLE_XXXX
= 0 << 0 | 3 << 2 | 2 << 4 | 1 << 6,
212 IR2_SWIZZLE_YYYY
= 1 << 0 | 0 << 2 | 3 << 4 | 2 << 6,
213 IR2_SWIZZLE_ZZZZ
= 2 << 0 | 1 << 2 | 0 << 4 | 3 << 6,
214 IR2_SWIZZLE_WWWW
= 3 << 0 | 2 << 2 | 1 << 4 | 0 << 6,
215 IR2_SWIZZLE_WYWW
= 3 << 0 | 0 << 2 | 1 << 4 | 0 << 6,
216 IR2_SWIZZLE_XYXY
= 0 << 0 | 0 << 2 | 2 << 4 | 2 << 6,
217 IR2_SWIZZLE_ZZXY
= 2 << 0 | 1 << 2 | 2 << 4 | 2 << 6,
218 IR2_SWIZZLE_YXZZ
= 1 << 0 | 3 << 2 | 0 << 4 | 3 << 6,
221 #define compile_error(ctx, args...) ({ \
226 static inline struct ir2_src
227 ir2_src(uint16_t num
, uint8_t swizzle
, enum ir2_src_type type
)
229 return (struct ir2_src
) {
236 /* ir2_assemble uses it .. */
237 struct ir2_src
ir2_zero(struct ir2_context
*ctx
);
239 #define ir2_foreach_instr(it, ctx) \
240 for (struct ir2_instr *it = (ctx)->instr; ({ \
241 while (it != &(ctx)->instr[(ctx)->instr_count] && it->type == IR2_NONE) it++; \
242 it != &(ctx)->instr[(ctx)->instr_count]; }); it++)
244 #define ir2_foreach_live_reg(it, ctx) \
245 for (struct ir2_reg **__ptr = (ctx)->live_regs, *it; ({ \
246 while (__ptr != &(ctx)->live_regs[64] && *__ptr == NULL) __ptr++; \
247 __ptr != &(ctx)->live_regs[64] ? (it=*__ptr) : NULL; }); it++)
249 #define ir2_foreach_avail(it) \
250 for (struct ir2_instr **__instrp = avail, *it; \
251 it = *__instrp, __instrp != &avail[avail_count]; __instrp++)
253 #define ir2_foreach_src(it, instr) \
254 for (struct ir2_src *it = instr->src; \
255 it != &instr->src[instr->src_count]; it++)
257 /* mask for register allocation
258 * 64 registers with 4 components each = 256 bits
264 static inline bool mask_isset(uint32_t * mask
, unsigned num
)
266 return ! !(mask
[num
/ 32] & 1 << num
% 32);
269 static inline void mask_set(uint32_t * mask
, unsigned num
)
271 mask
[num
/ 32] |= 1 << num
% 32;
274 static inline void mask_unset(uint32_t * mask
, unsigned num
)
276 mask
[num
/ 32] &= ~(1 << num
% 32);
279 static inline unsigned mask_reg(uint32_t * mask
, unsigned num
)
281 return mask
[num
/ 8] >> num
% 8 * 4 & 0xf;
284 static inline bool is_export(struct ir2_instr
*instr
)
286 return instr
->type
== IR2_ALU
&& instr
->alu
.export
>= 0;
289 static inline instr_alloc_type_t
export_buf(unsigned num
)
291 return num
< 32 ? SQ_PARAMETER_PIXEL
:
292 num
>= 62 ? SQ_POSITION
: SQ_MEMORY
;
295 /* component c for channel i */
296 static inline unsigned swiz_set(unsigned c
, unsigned i
)
298 return ((c
- i
) & 3) << i
* 2;
301 /* get swizzle in channel i */
302 static inline unsigned swiz_get(unsigned swiz
, unsigned i
)
304 return ((swiz
>> i
* 2) + i
) & 3;
307 static inline unsigned swiz_merge(unsigned swiz0
, unsigned swiz1
)
310 for (int i
= 0; i
< 4; i
++)
311 swiz
|= swiz_set(swiz_get(swiz0
, swiz_get(swiz1
, i
)), i
);
315 static inline void swiz_merge_p(uint8_t *swiz0
, unsigned swiz1
)
318 for (int i
= 0; i
< 4; i
++)
319 swiz
|= swiz_set(swiz_get(*swiz0
, swiz_get(swiz1
, i
)), i
);
323 static inline struct ir2_reg
* get_reg(struct ir2_instr
*instr
)
325 return instr
->is_ssa
? &instr
->ssa
: instr
->reg
;
328 static inline struct ir2_reg
*
329 get_reg_src(struct ir2_context
*ctx
, struct ir2_src
*src
)
333 return &ctx
->input
[src
->num
];
335 return &ctx
->instr
[src
->num
].ssa
;
337 return &ctx
->reg
[src
->num
];
343 /* gets a ncomp value for the dst */
344 static inline unsigned dst_ncomp(struct ir2_instr
*instr
)
347 return instr
->ssa
.ncomp
;
349 if (instr
->type
== IR2_FETCH
)
350 return instr
->reg
->ncomp
;
352 assert(instr
->type
== IR2_ALU
);
355 for (int i
= 0; i
< instr
->reg
->ncomp
; i
++)
356 ncomp
+= !!(instr
->alu
.write_mask
& 1 << i
);
360 /* gets a ncomp value for the src registers */
361 static inline unsigned src_ncomp(struct ir2_instr
*instr
)
363 if (instr
->type
== IR2_FETCH
) {
364 switch (instr
->fetch
.opc
) {
368 return instr
->fetch
.tex
.is_cube
? 3 : 2;
369 case TEX_SET_TEX_LOD
:
376 switch (instr
->alu
.scalar_opc
) {
377 case PRED_SETEs
... KILLONEs
:
383 switch (instr
->alu
.vector_opc
) {
390 case PRED_SETE_PUSHv
:
393 return dst_ncomp(instr
);