#include "ilo_dev.h"
#include "ilo_state_3d.h"
#include "ilo_state_sampler.h"
+#include "ilo_state_sol.h"
#include "ilo_builder.h"
static inline void
static inline void
gen7_3DSTATE_STREAMOUT(struct ilo_builder *builder,
- int render_stream,
- bool render_disable,
- int vertex_attrib_count,
- const int *buf_strides)
+ const struct ilo_state_sol *sol)
{
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 5 : 3;
uint32_t *dw;
- int buf_mask;
ILO_DEV_ASSERT(builder->dev, 7, 8);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_STREAMOUT) | (cmd_len - 2);
-
- dw[1] = render_stream << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT;
- if (render_disable)
- dw[1] |= GEN7_SO_DW1_RENDER_DISABLE;
-
- if (buf_strides) {
- buf_mask = ((bool) buf_strides[3]) << 3 |
- ((bool) buf_strides[2]) << 2 |
- ((bool) buf_strides[1]) << 1 |
- ((bool) buf_strides[0]);
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
- dw[3] = buf_strides[1] << 16 | buf_strides[0];
- dw[4] = buf_strides[3] << 16 | buf_strides[1];
- }
- } else {
- buf_mask = 0;
- }
-
- if (buf_mask) {
- int read_len;
-
- dw[1] |= GEN7_SO_DW1_SO_ENABLE |
- GEN7_SO_DW1_STATISTICS;
- /* API_OPENGL */
- if (true)
- dw[1] |= GEN7_REORDER_TRAILING << GEN7_SO_DW1_REORDER_MODE__SHIFT;
- if (ilo_dev_gen(builder->dev) < ILO_GEN(8))
- dw[1] |= buf_mask << GEN7_SO_DW1_BUFFER_ENABLES__SHIFT;
-
- read_len = (vertex_attrib_count + 1) / 2;
- if (!read_len)
- read_len = 1;
-
- dw[2] = 0 << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT |
- (read_len - 1) << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT |
- 0 << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT |
- (read_len - 1) << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT |
- 0 << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT |
- (read_len - 1) << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT |
- 0 << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT |
- (read_len - 1) << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT;
- } else {
- dw[2] = 0;
+ /* see sol_set_gen7_3DSTATE_STREAMOUT() */
+ dw[1] = sol->so[0];
+ dw[2] = sol->so[1];
+ if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
+ dw[3] = sol->so[2];
+ dw[4] = sol->so[3];
}
}
static inline void
gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder,
- const struct pipe_stream_output_info *so_info)
+ const struct ilo_state_sol *sol)
{
/*
* Note that "DWord Length" has 9 bits for this command and the type of
* cmd_len cannot be uint8_t.
*/
uint16_t cmd_len;
- struct {
- int buf_selects;
- int decl_count;
- uint16_t decls[128];
- } streams[4];
- unsigned buf_offsets[PIPE_MAX_SO_BUFFERS];
- int hw_decl_count, i;
+ int cmd_decl_count;
uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 7, 8);
- memset(streams, 0, sizeof(streams));
- memset(buf_offsets, 0, sizeof(buf_offsets));
-
- for (i = 0; i < so_info->num_outputs; i++) {
- unsigned decl, st, buf, reg, mask;
-
- st = so_info->output[i].stream;
- buf = so_info->output[i].output_buffer;
-
- /* pad with holes */
- while (buf_offsets[buf] < so_info->output[i].dst_offset) {
- int num_dwords;
-
- num_dwords = so_info->output[i].dst_offset - buf_offsets[buf];
- if (num_dwords > 4)
- num_dwords = 4;
-
- decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
- GEN7_SO_DECL_HOLE_FLAG |
- ((1 << num_dwords) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
-
- assert(streams[st].decl_count < Elements(streams[st].decls));
- streams[st].decls[streams[st].decl_count++] = decl;
- buf_offsets[buf] += num_dwords;
- }
- assert(buf_offsets[buf] == so_info->output[i].dst_offset);
-
- reg = so_info->output[i].register_index;
- mask = ((1 << so_info->output[i].num_components) - 1) <<
- so_info->output[i].start_component;
-
- decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
- reg << GEN7_SO_DECL_REG_INDEX__SHIFT |
- mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
-
- assert(streams[st].decl_count < Elements(streams[st].decls));
-
- streams[st].buf_selects |= 1 << buf;
- streams[st].decls[streams[st].decl_count++] = decl;
- buf_offsets[buf] += so_info->output[i].num_components;
- }
-
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) {
- hw_decl_count = MAX4(streams[0].decl_count, streams[1].decl_count,
- streams[2].decl_count, streams[3].decl_count);
+ cmd_decl_count = sol->decl_count;
} else {
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 201:
* whenever this command is issued. The "Num Entries [n]" fields
* still contain the actual numbers of valid decls."
*/
- hw_decl_count = 128;
+ cmd_decl_count = 128;
}
- cmd_len = 3 + 2 * hw_decl_count;
+ cmd_len = 3 + 2 * cmd_decl_count;
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SO_DECL_LIST) | (cmd_len - 2);
- dw[1] = streams[3].buf_selects << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT |
- streams[2].buf_selects << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT |
- streams[1].buf_selects << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT |
- streams[0].buf_selects << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT;
- dw[2] = streams[3].decl_count << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT |
- streams[2].decl_count << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT |
- streams[1].decl_count << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT |
- streams[0].decl_count << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT;
- dw += 3;
-
- for (i = 0; i < hw_decl_count; i++) {
- dw[0] = streams[1].decls[i] << 16 | streams[0].decls[i];
- dw[1] = streams[3].decls[i] << 16 | streams[2].decls[i];
- dw += 2;
+ /* see sol_set_gen7_3DSTATE_SO_DECL_LIST() */
+ dw[1] = sol->so[4];
+ dw[2] = sol->so[5];
+ memcpy(&dw[3], sol->decl, sizeof(sol->decl[0]) * sol->decl_count);
+
+ if (sol->decl_count < cmd_decl_count) {
+ memset(&dw[3 + 2 * sol->decl_count], 0, sizeof(sol->decl[0]) *
+ cmd_decl_count - sol->decl_count);
}
}
struct ilo_ve_state ve;
struct pipe_draw_info draw;
+ struct ilo_state_sol sol;
+
struct ilo_state_viewport vp;
uint32_t vp_data[20];
blitter->draw.mode = ILO_PRIM_RECTANGLES;
blitter->draw.count = 3;
+ ilo_state_sol_init_disabled(&blitter->sol, blitter->ilo->dev, false);
+
/**
* From the Haswell PRM, volume 7, page 615:
*
const struct ilo_state_vector *vec,
struct ilo_render_draw_session *session)
{
- const struct pipe_stream_output_info *so_info;
+ const struct ilo_state_sol *sol;
const struct ilo_shader_state *shader;
bool dirty_sh = false;
dirty_sh = DIRTY(VS);
}
- so_info = ilo_shader_get_kernel_so_info(shader);
+ sol = ilo_shader_get_kernel_sol(shader);
/* 3DSTATE_SO_BUFFER */
if ((DIRTY(SO) || dirty_sh || r->batch_bo_changed) &&
vec->so.enabled) {
+ const struct pipe_stream_output_info *so_info;
int i;
+ so_info = ilo_shader_get_kernel_so_info(shader);
+
for (i = 0; i < vec->so.count; i++) {
const int stride = so_info->stride[i] * 4; /* in bytes */
/* 3DSTATE_SO_DECL_LIST */
if (dirty_sh && vec->so.enabled)
- gen7_3DSTATE_SO_DECL_LIST(r->builder, so_info);
-
- /* 3DSTATE_STREAMOUT */
- if (DIRTY(SO) || DIRTY(RASTERIZER) || dirty_sh) {
- const int output_count = ilo_shader_get_kernel_param(shader,
- ILO_KERNEL_OUTPUT_COUNT);
- int buf_strides[4] = { 0, 0, 0, 0 };
- int i;
+ gen7_3DSTATE_SO_DECL_LIST(r->builder, sol);
- for (i = 0; i < vec->so.count; i++)
- buf_strides[i] = so_info->stride[i] * 4;
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 196-197:
+ *
+ * "Anytime the SOL unit MMIO registers or non-pipeline state are
+ * written, the SOL unit needs to receive a pipeline state update with
+ * SOL unit dirty state for information programmed in MMIO/NP to get
+ * loaded into the SOL unit.
+ *
+ * The SOL unit incorrectly double buffers MMIO/NP registers and only
+ * moves them into the design for usage when control topology is
+ * received with the SOL unit dirty state.
+ *
+ * If the state does not change, need to resend the same state.
+ *
+ * Because of corruption, software must flush the whole fixed function
+ * pipeline when 3DSTATE_STREAMOUT changes state."
+ *
+ * The first and fourth paragraphs are gone on Gen7.5+.
+ */
- gen7_3DSTATE_STREAMOUT(r->builder, 0,
- vec->rasterizer->state.rasterizer_discard,
- output_count, buf_strides);
- }
+ /* 3DSTATE_STREAMOUT */
+ gen7_3DSTATE_STREAMOUT(r->builder, sol);
}
static void
gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
gen7_disable_3DSTATE_GS(r->builder);
- gen7_3DSTATE_STREAMOUT(r->builder, 0, false, 0x0, 0);
+ gen7_3DSTATE_STREAMOUT(r->builder, &blitter->sol);
gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs);
}
static void
-copy_so_info(struct ilo_shader *sh,
- const struct pipe_stream_output_info *so_info)
+init_sol(struct ilo_shader *kernel,
+ const struct ilo_dev *dev,
+ const struct pipe_stream_output_info *so_info,
+ bool rasterizer_discard)
{
- unsigned i, attr;
+ struct ilo_state_sol_decl_info decls[4][PIPE_MAX_SO_OUTPUTS];
+ unsigned buf_offsets[PIPE_MAX_SO_BUFFERS];
+ struct ilo_state_sol_info info;
+ unsigned i;
- if (!so_info->num_outputs)
+ if (!so_info->num_outputs) {
+ ilo_state_sol_init_disabled(&kernel->sol, dev, rasterizer_discard);
return;
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.data = kernel->sol_data;
+ info.data_size = sizeof(kernel->sol_data);
+ info.sol_enable = true;
+ info.stats_enable = true;
+ info.tristrip_reorder = GEN7_REORDER_TRAILING;
+ info.render_disable = rasterizer_discard;
+ info.render_stream = 0;
+
+ for (i = 0; i < 4; i++) {
+ info.buffer_strides[i] = so_info->stride[i] * 4;
- sh->so_info = *so_info;
+ info.streams[i].cv_vue_attr_count = kernel->out.count;
+ info.streams[i].decls = decls[i];
+ }
+ memset(decls, 0, sizeof(decls));
+ memset(buf_offsets, 0, sizeof(buf_offsets));
for (i = 0; i < so_info->num_outputs; i++) {
+ const unsigned stream = so_info->output[i].stream;
+ const unsigned buffer = so_info->output[i].output_buffer;
+ struct ilo_state_sol_decl_info *decl;
+ unsigned attr;
+
/* figure out which attribute is sourced */
- for (attr = 0; attr < sh->out.count; attr++) {
- const int reg_idx = sh->out.register_indices[attr];
+ for (attr = 0; attr < kernel->out.count; attr++) {
+ const int reg_idx = kernel->out.register_indices[attr];
if (reg_idx == so_info->output[i].register_index)
break;
}
-
- if (attr < sh->out.count) {
- sh->so_info.output[i].register_index = attr;
- }
- else {
+ if (attr >= kernel->out.count) {
assert(!"stream output an undefined register");
- sh->so_info.output[i].register_index = 0;
+ attr = 0;
}
+ if (info.streams[stream].vue_read_count < attr + 1)
+ info.streams[stream].vue_read_count = attr + 1;
+
+ /* pad with holes first */
+ while (buf_offsets[buffer] < so_info->output[i].dst_offset) {
+ int num_dwords;
+
+ num_dwords = so_info->output[i].dst_offset - buf_offsets[buffer];
+ if (num_dwords > 4)
+ num_dwords = 4;
+
+ assert(info.streams[stream].decl_count < ARRAY_SIZE(decls[stream]));
+ decl = &decls[stream][info.streams[stream].decl_count];
+
+ decl->attr = 0;
+ decl->is_hole = true;
+ decl->component_base = 0;
+ decl->component_count = num_dwords;
+ decl->buffer = buffer;
+
+ info.streams[stream].decl_count++;
+ buf_offsets[buffer] += num_dwords;
+ }
+ assert(buf_offsets[buffer] == so_info->output[i].dst_offset);
+
+ assert(info.streams[stream].decl_count < ARRAY_SIZE(decls[stream]));
+ decl = &decls[stream][info.streams[stream].decl_count];
+
+ decl->attr = attr;
+ decl->is_hole = false;
/* PSIZE is at W channel */
- if (sh->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) {
+ if (kernel->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) {
assert(so_info->output[i].start_component == 0);
assert(so_info->output[i].num_components == 1);
- sh->so_info.output[i].start_component = 3;
+ decl->component_base = 3;
+ decl->component_count = 1;
+ } else {
+ decl->component_base = so_info->output[i].start_component;
+ decl->component_count = so_info->output[i].num_components;
}
+ decl->buffer = buffer;
+
+ info.streams[stream].decl_count++;
+ buf_offsets[buffer] += so_info->output[i].num_components;
}
+
+ ilo_state_sol_init(&kernel->sol, dev, &info);
}
/**
ilo_shader_state_add_variant(struct ilo_shader_state *state,
const struct ilo_shader_variant *variant)
{
+ bool rasterizer_discard = false;
struct ilo_shader *sh;
switch (state->info.type) {
case PIPE_SHADER_VERTEX:
sh = ilo_shader_compile_vs(state, variant);
+ rasterizer_discard = variant->u.vs.rasterizer_discard;
break;
case PIPE_SHADER_FRAGMENT:
sh = ilo_shader_compile_fs(state, variant);
break;
case PIPE_SHADER_GEOMETRY:
sh = ilo_shader_compile_gs(state, variant);
+ rasterizer_discard = variant->u.gs.rasterizer_discard;
break;
case PIPE_SHADER_COMPUTE:
sh = ilo_shader_compile_cs(state, variant);
sh->variant = *variant;
- copy_so_info(sh, &state->info.stream_output);
+ init_sol(sh, state->info.dev, &state->info.stream_output,
+ rasterizer_discard);
ilo_shader_state_add_shader(state, sh);
*/
const struct pipe_stream_output_info *
ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader)
+{
+ return &shader->info.stream_output;
+}
+
+const struct ilo_state_sol *
+ilo_shader_get_kernel_sol(const struct ilo_shader_state *shader)
{
const struct ilo_shader *kernel = shader->shader;
assert(kernel);
- return &kernel->so_info;
+ return &kernel->sol;
}
/**
struct ilo_shader_cache;
struct ilo_shader_state;
struct ilo_shader_cso;
+struct ilo_state_sol;
struct ilo_state_vector;
struct ilo_shader_cache *
const struct pipe_stream_output_info *
ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader);
+const struct ilo_state_sol *
+ilo_shader_get_kernel_sol(const struct ilo_shader_state *shader);
+
const struct ilo_kernel_routing *
ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader);
#include "core/ilo_state_cc.h"
#include "core/ilo_state_raster.h"
#include "core/ilo_state_sampler.h"
+#include "core/ilo_state_sol.h"
#include "core/ilo_state_surface.h"
#include "core/ilo_state_viewport.h"
#include "core/ilo_state_zs.h"
#ifndef ILO_SHADER_INTERNAL_H
#define ILO_SHADER_INTERNAL_H
+#include "core/ilo_state_sol.h"
+
#include "ilo_common.h"
#include "ilo_state.h"
#include "ilo_shader.h"
bool stream_output;
int svbi_post_inc;
- struct pipe_stream_output_info so_info;
+
+ uint32_t sol_data[PIPE_MAX_SO_OUTPUTS][2];
+ struct ilo_state_sol sol;
/* for VS stream output / rasterizer discard */
int gs_offsets[3];