return needs_border;
}
+static void
+emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ enum a4xx_state_block sb, struct fd_shaderbuf_stateobj *so)
+{
+ unsigned count = util_last_bit(so->enabled_mask);
+
+ if (count == 0)
+ return;
+
+ OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (4 * count));
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE4_0_NUM_UNIT(count));
+ OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(0) |
+ CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+ OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+ for (unsigned i = 0; i < count; i++) {
+ struct pipe_shader_buffer *buf = &so->sb[i];
+ if (buf->buffer) {
+ struct fd_resource *rsc = fd_resource(buf->buffer);
+ OUT_RELOCW(ring, rsc->bo, 0, 0, 0);
+ } else {
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
+
+ OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count));
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE4_0_NUM_UNIT(count));
+ OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(1) |
+ CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+ OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+ for (unsigned i = 0; i < count; i++) {
+ struct pipe_shader_buffer *buf = &so->sb[i];
+
+ // TODO maybe offset encoded somewhere here??
+ OUT_RING(ring, (buf->buffer_size << 16));
+ OUT_RING(ring, 0x00000000);
+ }
+
+ OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count));
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE4_0_NUM_UNIT(count));
+ OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(2) |
+ CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+ OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+ for (unsigned i = 0; i < count; i++) {
+ struct pipe_shader_buffer *buf = &so->sb[i];
+ if (buf->buffer) {
+ struct fd_resource *rsc = fd_resource(buf->buffer);
+ OUT_RELOCW(ring, rsc->bo, 0, 0, 0);
+ } else {
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
+ }
+}
+
void
fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit)
{
if (needs_border)
emit_border_color(ctx, ring);
+
+ if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO)
+ emit_ssbos(ctx, ring, SB4_SSBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT]);
}
/* emit setup at begin of new cmdstream buffer (don't rely on previous
OUT_RING(ring, 0x00000000);
OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CNTL, 5);
- OUT_RING(ring, A5XX_HLSQ_VS_CNTL_INSTRLEN(s[VS].instrlen));
- OUT_RING(ring, A5XX_HLSQ_FS_CNTL_INSTRLEN(s[FS].instrlen));
- OUT_RING(ring, A5XX_HLSQ_HS_CNTL_INSTRLEN(s[HS].instrlen));
- OUT_RING(ring, A5XX_HLSQ_DS_CNTL_INSTRLEN(s[DS].instrlen));
- OUT_RING(ring, A5XX_HLSQ_GS_CNTL_INSTRLEN(s[GS].instrlen));
+ OUT_RING(ring, A5XX_HLSQ_VS_CNTL_INSTRLEN(s[VS].instrlen) |
+ COND(s[VS].v && s[VS].v->has_ssbo, A5XX_HLSQ_VS_CNTL_SSBO_ENABLE));
+ OUT_RING(ring, A5XX_HLSQ_FS_CNTL_INSTRLEN(s[FS].instrlen) |
+ COND(s[FS].v && s[FS].v->has_ssbo, A5XX_HLSQ_FS_CNTL_SSBO_ENABLE));
+ OUT_RING(ring, A5XX_HLSQ_HS_CNTL_INSTRLEN(s[HS].instrlen) |
+ COND(s[HS].v && s[HS].v->has_ssbo, A5XX_HLSQ_HS_CNTL_SSBO_ENABLE));
+ OUT_RING(ring, A5XX_HLSQ_DS_CNTL_INSTRLEN(s[DS].instrlen) |
+ COND(s[DS].v && s[DS].v->has_ssbo, A5XX_HLSQ_DS_CNTL_SSBO_ENABLE));
+ OUT_RING(ring, A5XX_HLSQ_GS_CNTL_INSTRLEN(s[GS].instrlen) |
+ COND(s[GS].v && s[GS].v->has_ssbo, A5XX_HLSQ_GS_CNTL_SSBO_ENABLE));
OUT_PKT4(ring, REG_A5XX_SP_VS_CONFIG, 5);
OUT_RING(ring, A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET(s[VS].constoff) |
return 120;
return is_ir3(screen) ? 140 : 120;
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+ if (is_a5xx(screen))
+ return 4;
+ return 0;
+
/* Unsupported features. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
- case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
static int
fd_screen_get_shader_param(struct pipe_screen *pscreen,
- enum pipe_shader_type shader,
+ enum pipe_shader_type shader,
enum pipe_shader_cap param)
{
struct fd_screen *screen = fd_screen(pscreen);
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ if (is_a5xx(screen)) {
+ /* a5xx (and a4xx for that matter) has one state-block
+ * for compute-shader SSBO's and another that is shared
+ * by VS/HS/DS/GS/FS.. so to simplify things for now
+ * just advertise SSBOs for FS and CS. We could possibly
+ * do what blob does, and partition the space for
+ * VS/HS/DS/GS/FS. The blob advertises:
+ *
+ * GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS: 4
+ * GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS: 4
+ * GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS: 4
+ * GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS: 4
+ * GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS: 4
+ * GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS: 24
+ * GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS: 24
+ *
+ * I think that way we could avoid having to patch shaders
+ * for actual SSBO indexes by using a static partitioning.
+ */
+ switch(shader)
+ {
+ case PIPE_SHADER_FRAGMENT:
+ case PIPE_SHADER_COMPUTE:
+ return 24;
+ default:
+ return 0;
+ }
+ }
+ return 0;
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: