2 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Rob Clark <robclark@freedesktop.org>
25 * Jonathan Marek <jonathan@marek.ca>
28 #include "pipe/p_state.h"
29 #include "util/u_string.h"
30 #include "util/u_memory.h"
31 #include "util/u_inlines.h"
32 #include "util/u_format.h"
33 #include "tgsi/tgsi_dump.h"
34 #include "tgsi/tgsi_parse.h"
36 #include "freedreno_program.h"
39 #include "fd2_program.h"
40 #include "fd2_texture.h"
42 #include "instr-a2xx.h"
44 static struct fd2_shader_stateobj
*
45 create_shader(struct pipe_context
*pctx
, gl_shader_stage type
)
47 struct fd2_shader_stateobj
*so
= CALLOC_STRUCT(fd2_shader_stateobj
);
51 so
->is_a20x
= is_a20x(fd_context(pctx
)->screen
);
56 delete_shader(struct fd2_shader_stateobj
*so
)
61 for (int i
= 0; i
< ARRAY_SIZE(so
->variant
); i
++)
62 free(so
->variant
[i
].info
.dwords
);
67 emit(struct fd_ringbuffer
*ring
, gl_shader_stage type
,
68 struct ir2_shader_info
*info
, struct util_dynarray
*patches
)
72 assert(info
->sizedwords
);
74 OUT_PKT3(ring
, CP_IM_LOAD_IMMEDIATE
, 2 + info
->sizedwords
);
75 OUT_RING(ring
, type
== MESA_SHADER_FRAGMENT
);
76 OUT_RING(ring
, info
->sizedwords
);
79 util_dynarray_append(patches
, uint32_t*, &ring
->cur
[info
->mem_export_ptr
]);
81 for (i
= 0; i
< info
->sizedwords
; i
++)
82 OUT_RING(ring
, info
->dwords
[i
]);
86 ir2_glsl_type_size(const struct glsl_type
*type
)
88 return glsl_count_attribute_slots(type
, false);
92 fd2_fp_state_create(struct pipe_context
*pctx
,
93 const struct pipe_shader_state
*cso
)
95 struct fd2_shader_stateobj
*so
= create_shader(pctx
, MESA_SHADER_FRAGMENT
);
99 if (cso
->type
== PIPE_SHADER_IR_NIR
) {
100 so
->nir
= cso
->ir
.nir
;
101 NIR_PASS_V(so
->nir
, nir_lower_io
, nir_var_all
, ir2_glsl_type_size
,
102 (nir_lower_io_options
)0);
104 assert(cso
->type
== PIPE_SHADER_IR_TGSI
);
105 so
->nir
= ir2_tgsi_to_nir(cso
->tokens
, pctx
->screen
);
108 if (ir2_optimize_nir(so
->nir
, true))
111 so
->first_immediate
= so
->nir
->num_uniforms
;
113 ir2_compile(so
, 0, NULL
);
115 ralloc_free(so
->nir
);
125 fd2_fp_state_delete(struct pipe_context
*pctx
, void *hwcso
)
127 struct fd2_shader_stateobj
*so
= hwcso
;
132 fd2_vp_state_create(struct pipe_context
*pctx
,
133 const struct pipe_shader_state
*cso
)
135 struct fd2_shader_stateobj
*so
= create_shader(pctx
, MESA_SHADER_VERTEX
);
139 if (cso
->type
== PIPE_SHADER_IR_NIR
) {
140 so
->nir
= cso
->ir
.nir
;
141 NIR_PASS_V(so
->nir
, nir_lower_io
, nir_var_all
, ir2_glsl_type_size
,
142 (nir_lower_io_options
)0);
144 assert(cso
->type
== PIPE_SHADER_IR_TGSI
);
145 so
->nir
= ir2_tgsi_to_nir(cso
->tokens
, pctx
->screen
);
148 if (ir2_optimize_nir(so
->nir
, true))
151 so
->first_immediate
= so
->nir
->num_uniforms
;
153 /* compile binning variant now */
154 ir2_compile(so
, 0, NULL
);
164 fd2_vp_state_delete(struct pipe_context
*pctx
, void *hwcso
)
166 struct fd2_shader_stateobj
*so
= hwcso
;
171 patch_vtx_fetch(struct fd_context
*ctx
, struct pipe_vertex_element
*elem
,
172 instr_fetch_vtx_t
*instr
, uint16_t dst_swiz
)
174 struct pipe_vertex_buffer
*vb
=
175 &ctx
->vtx
.vertexbuf
.vb
[elem
->vertex_buffer_index
];
176 enum pipe_format format
= elem
->src_format
;
177 const struct util_format_description
*desc
=
178 util_format_description(format
);
181 /* Find the first non-VOID channel. */
182 for (j
= 0; j
< 4; j
++)
183 if (desc
->channel
[j
].type
!= UTIL_FORMAT_TYPE_VOID
)
186 instr
->format
= fd2_pipe2surface(format
);
187 instr
->num_format_all
= !desc
->channel
[j
].normalized
;
188 instr
->format_comp_all
= desc
->channel
[j
].type
== UTIL_FORMAT_TYPE_SIGNED
;
189 instr
->stride
= vb
->stride
;
190 instr
->offset
= elem
->src_offset
;
193 for (int i
= 0; i
< 4; i
++) {
194 unsigned s
= dst_swiz
>> i
*3 & 7;
195 swiz
|= (s
>= 4 ? s
: desc
->swizzle
[s
]) << i
*3;
197 instr
->dst_swiz
= swiz
;
201 patch_fetches(struct fd_context
*ctx
, struct ir2_shader_info
*info
,
202 struct fd_vertex_stateobj
*vtx
, struct fd_texture_stateobj
*tex
)
204 for (int i
= 0; i
< info
->num_fetch_instrs
; i
++) {
205 struct ir2_fetch_info
*fi
= &info
->fetch_info
[i
];
207 instr_fetch_t
*instr
= (instr_fetch_t
*) &info
->dwords
[fi
->offset
];
208 if (instr
->opc
== VTX_FETCH
) {
209 unsigned idx
= (instr
->vtx
.const_index
- 20) * 3 +
210 instr
->vtx
.const_index_sel
;
211 patch_vtx_fetch(ctx
, &vtx
->pipe
[idx
], &instr
->vtx
, fi
->vtx
.dst_swiz
);
215 assert(instr
->opc
== TEX_FETCH
);
216 instr
->tex
.const_idx
= fd2_get_const_idx(ctx
, tex
, fi
->tex
.samp_id
);
217 instr
->tex
.src_swiz
= fi
->tex
.src_swiz
;
218 if (fd2_texture_swap_xy(tex
, fi
->tex
.samp_id
)) {
219 unsigned x
= instr
->tex
.src_swiz
;
220 instr
->tex
.src_swiz
= (x
& 0x30) | (x
& 3) << 2 | (x
>> 2 & 3);
226 fd2_program_emit(struct fd_context
*ctx
, struct fd_ringbuffer
*ring
,
227 struct fd_program_stateobj
*prog
)
229 struct fd2_shader_stateobj
*fp
= NULL
, *vp
;
230 struct ir2_shader_info
*fpi
, *vpi
;
231 struct ir2_frag_linkage
*f
;
232 uint8_t vs_gprs
, fs_gprs
= 0, vs_export
= 0;
233 enum a2xx_sq_ps_vtx_mode mode
= POSITION_1_VECTOR
;
234 bool binning
= (ctx
->batch
&& ring
== ctx
->batch
->binning
);
235 unsigned variant
= 0;
239 /* find variant matching the linked fragment shader */
242 for (variant
= 1; variant
< ARRAY_SIZE(vp
->variant
); variant
++) {
243 /* if checked all variants, compile a new variant */
244 if (!vp
->variant
[variant
].info
.sizedwords
) {
245 ir2_compile(vp
, variant
, fp
);
249 /* check if fragment shader linkage matches */
250 if (!memcmp(&vp
->variant
[variant
].f
, &fp
->variant
[0].f
,
251 sizeof(struct ir2_frag_linkage
)))
254 assert(variant
< ARRAY_SIZE(vp
->variant
));
257 vpi
= &vp
->variant
[variant
].info
;
258 fpi
= &fp
->variant
[0].info
;
259 f
= &fp
->variant
[0].f
;
261 /* clear/gmem2mem/mem2gmem need to be changed to remove this condition */
262 if (prog
!= &ctx
->solid_prog
&& prog
!= &ctx
->blit_prog
[0]) {
263 patch_fetches(ctx
, vpi
, ctx
->vtx
.vtx
, &ctx
->tex
[PIPE_SHADER_VERTEX
]);
265 patch_fetches(ctx
, fpi
, NULL
, &ctx
->tex
[PIPE_SHADER_FRAGMENT
]);
268 emit(ring
, MESA_SHADER_VERTEX
, vpi
,
269 binning
? &ctx
->batch
->shader_patches
: NULL
);
272 emit(ring
, MESA_SHADER_FRAGMENT
, fpi
, NULL
);
273 fs_gprs
= (fpi
->max_reg
< 0) ? 0x80 : fpi
->max_reg
;
274 vs_export
= MAX2(1, f
->inputs_count
) - 1;
277 vs_gprs
= (vpi
->max_reg
< 0) ? 0x80 : vpi
->max_reg
;
279 if (vp
->writes_psize
&& !binning
)
280 mode
= POSITION_2_VECTORS_SPRITE
;
282 /* set register to use for param (fragcoord/pointcoord/frontfacing) */
283 OUT_PKT3(ring
, CP_SET_CONSTANT
, 2);
284 OUT_RING(ring
, CP_REG(REG_A2XX_SQ_CONTEXT_MISC
));
285 OUT_RING(ring
, A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY
) |
286 COND(fp
, A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS(f
->inputs_count
)) |
287 /* we need SCREEN_XY for both fragcoord and frontfacing */
288 A2XX_SQ_CONTEXT_MISC_SC_OUTPUT_SCREEN_XY
);
290 OUT_PKT3(ring
, CP_SET_CONSTANT
, 2);
291 OUT_RING(ring
, CP_REG(REG_A2XX_SQ_PROGRAM_CNTL
));
292 OUT_RING(ring
, A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(2) |
293 A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE(mode
) |
294 A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE
|
295 A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE
|
296 A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export
) |
297 A2XX_SQ_PROGRAM_CNTL_PS_REGS(fs_gprs
) |
298 A2XX_SQ_PROGRAM_CNTL_VS_REGS(vs_gprs
) |
299 COND(fp
&& fp
->need_param
, A2XX_SQ_PROGRAM_CNTL_PARAM_GEN
) |
300 COND(!fp
, A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_VTX
));
304 fd2_prog_init(struct pipe_context
*pctx
)
306 struct fd_context
*ctx
= fd_context(pctx
);
307 struct fd_program_stateobj
*prog
;
308 struct fd2_shader_stateobj
*so
;
309 struct ir2_shader_info
*info
;
310 instr_fetch_vtx_t
*instr
;
312 pctx
->create_fs_state
= fd2_fp_state_create
;
313 pctx
->delete_fs_state
= fd2_fp_state_delete
;
315 pctx
->create_vs_state
= fd2_vp_state_create
;
316 pctx
->delete_vs_state
= fd2_vp_state_delete
;
320 /* XXX maybe its possible to reuse patch_vtx_fetch somehow? */
322 prog
= &ctx
->solid_prog
;
324 ir2_compile(prog
->vp
, 1, prog
->fp
);
326 #define IR2_FETCH_SWIZ_XY01 0xb08
327 #define IR2_FETCH_SWIZ_XYZ1 0xa88
329 info
= &so
->variant
[1].info
;
331 instr
= (instr_fetch_vtx_t
*) &info
->dwords
[info
->fetch_info
[0].offset
];
332 instr
->const_index
= 26;
333 instr
->const_index_sel
= 0;
334 instr
->format
= FMT_32_32_32_FLOAT
;
335 instr
->format_comp_all
= false;
337 instr
->num_format_all
= true;
338 instr
->dst_swiz
= IR2_FETCH_SWIZ_XYZ1
;
340 prog
= &ctx
->blit_prog
[0];
342 ir2_compile(prog
->vp
, 1, prog
->fp
);
344 info
= &so
->variant
[1].info
;
346 instr
= (instr_fetch_vtx_t
*) &info
->dwords
[info
->fetch_info
[0].offset
];
347 instr
->const_index
= 26;
348 instr
->const_index_sel
= 1;
349 instr
->format
= FMT_32_32_FLOAT
;
350 instr
->format_comp_all
= false;
352 instr
->num_format_all
= false;
353 instr
->dst_swiz
= IR2_FETCH_SWIZ_XY01
;
355 instr
= (instr_fetch_vtx_t
*) &info
->dwords
[info
->fetch_info
[1].offset
];
356 instr
->const_index
= 26;
357 instr
->const_index_sel
= 0;
358 instr
->format
= FMT_32_32_32_FLOAT
;
359 instr
->format_comp_all
= false;
361 instr
->num_format_all
= false;
362 instr
->dst_swiz
= IR2_FETCH_SWIZ_XYZ1
;