if (vb->buffer && vb->stride <= 2048) {
const struct ilo_buffer *buf = ilo_buffer(vb->buffer);
const uint32_t start_offset = vb->buffer_offset;
- const uint32_t end_offset = buf->bo->get_size(buf->bo) - 1;
+ /*
+ * As noted in ilo_translate_format(), we treat some 3-component
+ * formats as 4-component formats to work around hardware
+ * limitations. Imagine the case where the vertex buffer holds a
+ * single PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6.
+ * The hardware would not be able to fetch it because the vertex
+ * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
+ * and that takes at least 8 bytes.
+ *
+ * For the workaround to work, we query the physical size, which is
+ * page aligned, to calculate end_offset so that the last vertex has
+ * a better chance to be fetched.
+ */
+ const uint32_t end_offset = intel_bo_get_size(buf->bo) - 1;
dw |= vb->stride << BRW_VB0_PITCH_SHIFT;
static void
gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev,
- const struct pipe_index_buffer *ib,
+ const struct ilo_ib_state *ib,
bool enable_cut_index,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a);
const uint8_t cmd_len = 3;
- const struct ilo_buffer *buf = ilo_buffer(ib->buffer);
+ const struct ilo_buffer *buf = ilo_buffer(ib->resource);
uint32_t start_offset, end_offset;
int format;
if (!buf)
return;
- format = gen6_translate_index_size(ib->index_size);
+ format = gen6_translate_index_size(ib->state.index_size);
- start_offset = ib->offset;
- /* start_offset must be aligned to index size */
- if (start_offset % ib->index_size) {
- /* TODO need a u_upload_mgr to upload the IB to an aligned address */
- assert(!"unaligned index buffer offset");
- start_offset -= start_offset % ib->index_size;
- }
+ /*
+ * set start_offset to 0 here and adjust pipe_draw_info::start with
+ * ib->draw_start_offset in 3DPRIMITIVE
+ */
+ start_offset = 0;
+ end_offset = buf->bo_size;
- /* end_offset must also be aligned */
- end_offset = buf->bo->get_size(buf->bo);
- end_offset -= (end_offset % ib->index_size);
- /* it is inclusive */
- end_offset -= 1;
+ /* end_offset must also be aligned and is inclusive */
+ end_offset -= (end_offset % ib->state.index_size);
+ end_offset--;
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2) |
ilo_cp_end(cp);
}
-static void
-gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
- const struct ilo_shader *vs,
- int num_samplers,
- struct ilo_cp *cp)
+void
+ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *vs,
+ struct ilo_shader_cso *cso)
{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10);
- const uint8_t cmd_len = 6;
+ int start_grf, vue_read_len, max_threads;
uint32_t dw2, dw4, dw5;
- int vue_read_len, max_threads;
ILO_GPE_VALID_GEN(dev, 6, 7);
- if (!vs) {
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
- return;
- }
+ start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG);
+ vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT);
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 135:
* "It is UNDEFINED to set this field to 0 indicating no Vertex URB
* data to be read and passed to the thread."
*/
- vue_read_len = (vs->in.count + 1) / 2;
+ vue_read_len = (vue_read_len + 1) / 2;
if (!vue_read_len)
vue_read_len = 1;
break;
}
- dw2 = ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT;
- if (false)
- dw2 |= GEN6_VS_FLOATING_POINT_MODE_ALT;
+ dw2 = (true) ? 0 : GEN6_VS_FLOATING_POINT_MODE_ALT;
- dw4 = vs->in.start_grf << GEN6_VS_DISPATCH_START_GRF_SHIFT |
+ dw4 = start_grf << GEN6_VS_DISPATCH_START_GRF_SHIFT |
vue_read_len << GEN6_VS_URB_READ_LENGTH_SHIFT |
0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT;
else
dw5 |= (max_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT;
+ STATIC_ASSERT(Elements(cso->payload) >= 3);
+ cso->payload[0] = dw2;
+ cso->payload[1] = dw4;
+ cso->payload[2] = dw5;
+}
+
+static void
+gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *vs,
+ int num_samplers,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10);
+ const uint8_t cmd_len = 6;
+ const struct ilo_shader_cso *cso;
+ uint32_t dw2, dw4, dw5;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ if (!vs) {
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+ return;
+ }
+
+ cso = ilo_shader_get_kernel_cso(vs);
+ dw2 = cso->payload[0];
+ dw4 = cso->payload[1];
+ dw5 = cso->payload[2];
+
+ dw2 |= ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT;
+
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, vs->cache_offset);
+ ilo_cp_write(cp, ilo_shader_get_kernel_offset(vs));
ilo_cp_write(cp, dw2);
ilo_cp_write(cp, 0); /* scratch */
ilo_cp_write(cp, dw4);
ilo_cp_end(cp);
}
+void
+ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *gs,
+ struct ilo_shader_cso *cso)
+{
+ int start_grf, vue_read_len, max_threads;
+ uint32_t dw2, dw4, dw5, dw6;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) {
+ start_grf = ilo_shader_get_kernel_param(gs,
+ ILO_KERNEL_URB_DATA_START_REG);
+
+ vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
+ }
+ else {
+ start_grf = ilo_shader_get_kernel_param(gs,
+ ILO_KERNEL_VS_GEN6_SO_START_REG);
+
+ vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT);
+ }
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 153:
+ *
+ * "Specifies the amount of URB data read and passed in the thread
+ * payload for each Vertex URB entry, in 256-bit register increments.
+ *
+ * It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
+ * 0 indicating no Vertex URB data to be read and passed to the
+ * thread."
+ */
+ vue_read_len = (vue_read_len + 1) / 2;
+ if (!vue_read_len)
+ vue_read_len = 1;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 154:
+ *
+ * "Maximum Number of Threads valid range is [0,27] when Rendering
+ * Enabled bit is set."
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 173:
+ *
+ * "Programming Note: If the GS stage is enabled, software must always
+ * allocate at least one GS URB Entry. This is true even if the GS
+ * thread never needs to output vertices to the pipeline, e.g., when
+ * only performing stream output. This is an artifact of the need to
+ * pass the GS thread an initial destination URB handle."
+ *
+ * As such, we always enable rendering, and limit the number of threads.
+ */
+ if (dev->gt == 2) {
+ /* maximum is 60, but limited to 28 */
+ max_threads = 28;
+ }
+ else {
+ /* maximum is 24, but limited to 21 (see brwCreateContext()) */
+ max_threads = 21;
+ }
+
+ dw2 = GEN6_GS_SPF_MODE;
+
+ dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
+ 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
+ start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
+
+ dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
+ GEN6_GS_STATISTICS_ENABLE |
+ GEN6_GS_SO_STATISTICS_ENABLE |
+ GEN6_GS_RENDERING_ENABLE;
+
+ /*
+ * we cannot make use of GEN6_GS_REORDER because it will reorder
+ * triangle strips according to D3D rules (triangle 2N+1 uses vertices
+ * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
+ * (2N+2, 2N+1, 2N+3)).
+ */
+ dw6 = GEN6_GS_ENABLE;
+
+ if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY))
+ dw6 |= GEN6_GS_DISCARD_ADJACENCY;
+
+ if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) {
+ const uint32_t svbi_post_inc =
+ ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC);
+
+ dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
+ if (svbi_post_inc) {
+ dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
+ svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
+ }
+ }
+
+ STATIC_ASSERT(Elements(cso->payload) >= 4);
+ cso->payload[0] = dw2;
+ cso->payload[1] = dw4;
+ cso->payload[2] = dw5;
+ cso->payload[3] = dw6;
+}
+
static void
gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
- const struct ilo_shader *gs,
- const struct ilo_shader *vs,
- uint32_t vs_offset,
+ const struct ilo_shader_state *gs,
+ const struct ilo_shader_state *vs,
+ int verts_per_prim,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
const uint8_t cmd_len = 7;
uint32_t dw1, dw2, dw4, dw5, dw6;
- int i;
ILO_GPE_VALID_GEN(dev, 6, 6);
- if (!gs && (!vs || !vs->stream_output)) {
- dw1 = 0;
- dw2 = 0;
- dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT;
- dw5 = GEN6_GS_STATISTICS_ENABLE;
- dw6 = 0;
- }
- else {
- int max_threads, vue_read_len;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 154:
- *
- * "Maximum Number of Threads valid range is [0,27] when Rendering
- * Enabled bit is set."
- *
- * From the Sandy Bridge PRM, volume 2 part 1, page 173:
- *
- * "Programming Note: If the GS stage is enabled, software must
- * always allocate at least one GS URB Entry. This is true even if
- * the GS thread never needs to output vertices to the pipeline,
- * e.g., when only performing stream output. This is an artifact of
- * the need to pass the GS thread an initial destination URB
- * handle."
- *
- * As such, we always enable rendering, and limit the number of threads.
- */
- if (dev->gt == 2) {
- /* maximum is 60, but limited to 28 */
- max_threads = 28;
- }
- else {
- /* maximum is 24, but limited to 21 (see brwCreateContext()) */
- max_threads = 21;
- }
+ if (gs) {
+ const struct ilo_shader_cso *cso;
- if (max_threads > 28)
- max_threads = 28;
+ dw1 = ilo_shader_get_kernel_offset(gs);
- dw2 = GEN6_GS_SPF_MODE;
+ cso = ilo_shader_get_kernel_cso(gs);
+ dw2 = cso->payload[0];
+ dw4 = cso->payload[1];
+ dw5 = cso->payload[2];
+ dw6 = cso->payload[3];
+ }
+ else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) {
+ struct ilo_shader_cso cso;
+ enum ilo_kernel_param param;
- dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
- GEN6_GS_STATISTICS_ENABLE |
- GEN6_GS_SO_STATISTICS_ENABLE |
- GEN6_GS_RENDERING_ENABLE;
+ switch (verts_per_prim) {
+ case 1:
+ param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
+ break;
+ case 2:
+ param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
+ break;
+ default:
+ param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
+ break;
+ }
- /*
- * we cannot make use of GEN6_GS_REORDER because it will reorder
- * triangle strips according to D3D rules (triangle 2N+1 uses vertices
- * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
- * (2N+2, 2N+1, 2N+3)).
- */
- dw6 = GEN6_GS_ENABLE;
-
- if (gs) {
- /* VS ouputs must match GS inputs */
- assert(gs->in.count == vs->out.count);
- for (i = 0; i < gs->in.count; i++) {
- assert(gs->in.semantic_names[i] == vs->out.semantic_names[i]);
- assert(gs->in.semantic_indices[i] == vs->out.semantic_indices[i]);
- }
+ dw1 = ilo_shader_get_kernel_offset(vs) +
+ ilo_shader_get_kernel_param(vs, param);
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 153:
- *
- * "It is UNDEFINED to set this field (Vertex URB Entry Read
- * Length) to 0 indicating no Vertex URB data to be read and
- * passed to the thread."
- */
- vue_read_len = (gs->in.count + 1) / 2;
- if (!vue_read_len)
- vue_read_len = 1;
-
- dw1 = gs->cache_offset;
- dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
- 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
- gs->in.start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
-
- if (gs->in.discard_adj)
- dw6 |= GEN6_GS_DISCARD_ADJACENCY;
-
- if (gs->stream_output) {
- dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
- if (gs->svbi_post_inc) {
- dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
- gs->svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
- }
- }
- }
- else {
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 153:
- *
- * "It is UNDEFINED to set this field (Vertex URB Entry Read
- * Length) to 0 indicating no Vertex URB data to be read and
- * passed to the thread."
- */
- vue_read_len = (vs->out.count + 1) / 2;
- if (!vue_read_len)
- vue_read_len = 1;
-
- dw1 = vs_offset;
- dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
- 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
- vs->gs_start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
-
- if (vs->in.discard_adj)
- dw6 |= GEN6_GS_DISCARD_ADJACENCY;
-
- dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
- if (vs->svbi_post_inc) {
- dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
- vs->svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
- }
- }
+ /* cannot use VS's CSO */
+ ilo_gpe_init_gs_cso_gen6(dev, vs, &cso);
+ dw2 = cso.payload[0];
+ dw4 = cso.payload[1];
+ dw5 = cso.payload[2];
+ dw6 = cso.payload[3];
+ }
+ else {
+ dw1 = 0;
+ dw2 = 0;
+ dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT;
+ dw5 = GEN6_GS_STATISTICS_ENABLE;
+ dw6 = 0;
}
ilo_cp_begin(cp, cmd_len);
ilo_cp_end(cp);
}
-static void
-gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
- const struct pipe_rasterizer_state *rasterizer,
- bool has_linear_interp,
- bool enable_guardband,
- int num_viewports,
- struct ilo_cp *cp)
+void
+ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev,
+ const struct pipe_rasterizer_state *state,
+ struct ilo_rasterizer_clip *clip)
{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12);
- const uint8_t cmd_len = 4;
uint32_t dw1, dw2, dw3;
ILO_GPE_VALID_GEN(dev, 6, 7);
- if (!rasterizer) {
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
-
- return;
- }
-
dw1 = GEN6_CLIP_STATISTICS_ENABLE;
if (dev->gen >= ILO_GEN(7)) {
dw1 |= 0 << 19 |
GEN7_CLIP_EARLY_CULL;
- if (rasterizer->front_ccw)
+ if (state->front_ccw)
dw1 |= GEN7_CLIP_WINDING_CCW;
- switch (rasterizer->cull_face) {
+ switch (state->cull_face) {
case PIPE_FACE_NONE:
dw1 |= GEN7_CLIP_CULLMODE_NONE;
break;
dw2 = GEN6_CLIP_ENABLE |
GEN6_CLIP_XY_TEST |
- rasterizer->clip_plane_enable << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT |
+ state->clip_plane_enable << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT |
GEN6_CLIP_MODE_NORMAL;
- if (rasterizer->clip_halfz)
+ if (state->clip_halfz)
dw2 |= GEN6_CLIP_API_D3D;
else
dw2 |= GEN6_CLIP_API_OGL;
- if (rasterizer->depth_clip)
+ if (state->depth_clip)
dw2 |= GEN6_CLIP_Z_TEST;
+ if (state->flatshade_first) {
+ dw2 |= 0 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
+ 0 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
+ 1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
+ }
+ else {
+ dw2 |= 2 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
+ 1 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
+ 2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
+ }
+
+ dw3 = 0x1 << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
+ 0x7ff << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT;
+
+ clip->payload[0] = dw1;
+ clip->payload[1] = dw2;
+ clip->payload[2] = dw3;
+
+ clip->can_enable_guardband = true;
+
/*
* There are several reasons that guard band test should be disabled
*
- * - when the renderer does not perform 2D clipping
* - GL wide points (to avoid partially visibie object)
* - GL wide or AA lines (to avoid partially visibie object)
*/
- if (enable_guardband && true /* API_GL */) {
- if (rasterizer->point_size_per_vertex || rasterizer->point_size > 1.0f)
- enable_guardband = false;
- if (rasterizer->line_smooth || rasterizer->line_width > 1.0f)
- enable_guardband = false;
- }
+ if (state->point_size_per_vertex || state->point_size > 1.0f)
+ clip->can_enable_guardband = false;
+ if (state->line_smooth || state->line_width > 1.0f)
+ clip->can_enable_guardband = false;
+}
- if (enable_guardband)
- dw2 |= GEN6_CLIP_GB_TEST;
+static void
+gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
+ const struct ilo_rasterizer_state *rasterizer,
+ const struct ilo_shader_state *fs,
+ bool enable_guardband,
+ int num_viewports,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12);
+ const uint8_t cmd_len = 4;
+ uint32_t dw1, dw2, dw3;
- if (has_linear_interp)
- dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
+ if (rasterizer) {
+ int interps;
- if (rasterizer->flatshade_first) {
- dw2 |= 0 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
- 0 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
- 1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
+ dw1 = rasterizer->clip.payload[0];
+ dw2 = rasterizer->clip.payload[1];
+ dw3 = rasterizer->clip.payload[2];
+
+ if (enable_guardband && rasterizer->clip.can_enable_guardband)
+ dw2 |= GEN6_CLIP_GB_TEST;
+
+ interps = (fs) ? ilo_shader_get_kernel_param(fs,
+ ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0;
+
+ if (interps & (1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC |
+ 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC |
+ 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC))
+ dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
+
+ dw3 |= GEN6_CLIP_FORCE_ZERO_RTAINDEX |
+ (num_viewports - 1);
}
else {
- dw2 |= 2 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
- 1 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
- 2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
+ dw1 = 0;
+ dw2 = 0;
+ dw3 = 0;
}
- dw3 = 0x1 << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
- 0x7ff << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
- GEN6_CLIP_FORCE_ZERO_RTAINDEX |
- (num_viewports - 1);
-
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, dw1);
ilo_cp_end(cp);
}
-/**
- * Fill in DW2 to DW7 of 3DSTATE_SF.
- */
void
-ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
- const struct pipe_rasterizer_state *rasterizer,
- int num_samples,
- enum pipe_format depth_format,
- bool separate_stencil,
- uint32_t *dw, int num_dwords)
+ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
+ const struct pipe_rasterizer_state *state,
+ struct ilo_rasterizer_sf *sf)
{
float offset_const, offset_scale, offset_clamp;
- int format, line_width, point_width;
+ int line_width, point_width;
+ uint32_t dw1, dw2, dw3;
ILO_GPE_VALID_GEN(dev, 6, 7);
- assert(num_dwords == 6);
-
- if (!rasterizer) {
- dw[0] = 0;
- dw[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0;
- dw[2] = 0;
- dw[3] = 0;
- dw[4] = 0;
- dw[5] = 0;
-
- return;
- }
/*
* Scale the constant term. The minimum representable value used by the HW
* is not large enouch to be the minimum resolvable difference.
*/
- offset_const = rasterizer->offset_units * 2.0f;
-
- offset_scale = rasterizer->offset_scale;
- offset_clamp = rasterizer->offset_clamp;
-
- if (separate_stencil) {
- switch (depth_format) {
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- depth_format = PIPE_FORMAT_Z24X8_UNORM;
- break;
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- depth_format = PIPE_FORMAT_Z32_FLOAT;;
- break;
- case PIPE_FORMAT_S8_UINT:
- depth_format = PIPE_FORMAT_NONE;
- break;
- default:
- break;
- }
- }
-
- format = gen6_translate_depth_format(depth_format);
- /* FLOAT surface is assumed when there is no depth buffer */
- if (format < 0)
- format = BRW_DEPTHFORMAT_D32_FLOAT;
-
- /*
- * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
- * pixels in the minor direction. We have to make the lines slightly
- * thicker, 0.5 pixel on both sides, so that they intersect that many
- * pixels are considered into the lines.
- *
- * Line width is in U3.7.
- */
- line_width = (int) ((rasterizer->line_width +
- (float) rasterizer->line_smooth) * 128.0f + 0.5f);
- line_width = CLAMP(line_width, 0, 1023);
+ offset_const = state->offset_units * 2.0f;
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 251:
- *
- * "Software must not program a value of 0.0 when running in
- * MSRASTMODE_ON_xxx modes - zero-width lines are not available when
- * multisampling rasterization is enabled."
- */
- if (rasterizer->multisample) {
- if (!line_width)
- line_width = 128; /* 1.0f */
- }
- else if (line_width == 128 && !rasterizer->line_smooth) {
- /* use GIQ rules */
- line_width = 0;
- }
-
- /* in U8.3 */
- point_width = (int) (rasterizer->point_size * 8.0f + 0.5f);
- point_width = CLAMP(point_width, 1, 2047);
+ offset_scale = state->offset_scale;
+ offset_clamp = state->offset_clamp;
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 248:
* should be cleared if clipping is disabled or Statistics Enable in
* CLIP_STATE is clear."
*/
- dw[0] = GEN6_SF_STATISTICS_ENABLE |
- GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
+ dw1 = GEN6_SF_STATISTICS_ENABLE |
+ GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
/* XXX GEN6 path seems to work fine for GEN7 */
if (false && dev->gen >= ILO_GEN(7)) {
- dw[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
-
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 258:
*
* bias (Slope, Bias) values are used. Setting this bit may have
* some degradation of performance for some workloads."
*/
- if (rasterizer->offset_tri ||
- rasterizer->offset_line ||
- rasterizer->offset_point) {
+ if (state->offset_tri || state->offset_line || state->offset_point) {
/* XXX need to scale offset_const according to the depth format */
- dw[0] |= GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS;
+ dw1 |= GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS;
- dw[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID |
- GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME |
- GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
+ dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID |
+ GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME |
+ GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
}
else {
offset_const = 0.0f;
}
}
else {
- if (dev->gen >= ILO_GEN(7))
- dw[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
-
- if (rasterizer->offset_tri)
- dw[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
- if (rasterizer->offset_line)
- dw[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
- if (rasterizer->offset_point)
- dw[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
+ if (state->offset_tri)
+ dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
+ if (state->offset_line)
+ dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
+ if (state->offset_point)
+ dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
}
- switch (rasterizer->fill_front) {
+ switch (state->fill_front) {
case PIPE_POLYGON_MODE_FILL:
- dw[0] |= GEN6_SF_FRONT_SOLID;
+ dw1 |= GEN6_SF_FRONT_SOLID;
break;
case PIPE_POLYGON_MODE_LINE:
- dw[0] |= GEN6_SF_FRONT_WIREFRAME;
+ dw1 |= GEN6_SF_FRONT_WIREFRAME;
break;
case PIPE_POLYGON_MODE_POINT:
- dw[0] |= GEN6_SF_FRONT_POINT;
+ dw1 |= GEN6_SF_FRONT_POINT;
break;
}
- switch (rasterizer->fill_back) {
+ switch (state->fill_back) {
case PIPE_POLYGON_MODE_FILL:
- dw[0] |= GEN6_SF_BACK_SOLID;
+ dw1 |= GEN6_SF_BACK_SOLID;
break;
case PIPE_POLYGON_MODE_LINE:
- dw[0] |= GEN6_SF_BACK_WIREFRAME;
+ dw1 |= GEN6_SF_BACK_WIREFRAME;
break;
case PIPE_POLYGON_MODE_POINT:
- dw[0] |= GEN6_SF_BACK_POINT;
+ dw1 |= GEN6_SF_BACK_POINT;
break;
}
- if (rasterizer->front_ccw)
- dw[0] |= GEN6_SF_WINDING_CCW;
+ if (state->front_ccw)
+ dw1 |= GEN6_SF_WINDING_CCW;
- dw[1] = 0;
+ dw2 = 0;
- if (rasterizer->line_smooth) {
+ if (state->line_smooth) {
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 251:
*
*
* TODO We do not check those yet.
*/
- dw[1] |= GEN6_SF_LINE_AA_ENABLE |
- GEN6_SF_LINE_END_CAP_WIDTH_1_0;
+ dw2 |= GEN6_SF_LINE_AA_ENABLE |
+ GEN6_SF_LINE_END_CAP_WIDTH_1_0;
}
- switch (rasterizer->cull_face) {
+ switch (state->cull_face) {
case PIPE_FACE_NONE:
- dw[1] |= GEN6_SF_CULL_NONE;
+ dw2 |= GEN6_SF_CULL_NONE;
break;
case PIPE_FACE_FRONT:
- dw[1] |= GEN6_SF_CULL_FRONT;
+ dw2 |= GEN6_SF_CULL_FRONT;
break;
case PIPE_FACE_BACK:
- dw[1] |= GEN6_SF_CULL_BACK;
+ dw2 |= GEN6_SF_CULL_BACK;
break;
case PIPE_FACE_FRONT_AND_BACK:
- dw[1] |= GEN6_SF_CULL_BOTH;
+ dw2 |= GEN6_SF_CULL_BOTH;
break;
}
- dw[1] |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
+ /*
+ * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
+ * pixels in the minor direction. We have to make the lines slightly
+ * thicker, 0.5 pixel on both sides, so that they intersect that many
+ * pixels are considered into the lines.
+ *
+ * Line width is in U3.7.
+ */
+ line_width = (int) ((state->line_width +
+ (float) state->line_smooth) * 128.0f + 0.5f);
+ line_width = CLAMP(line_width, 0, 1023);
+
+ if (line_width == 128 && !state->line_smooth) {
+ /* use GIQ rules */
+ line_width = 0;
+ }
- if (rasterizer->scissor)
- dw[1] |= GEN6_SF_SCISSOR_ENABLE;
+ dw2 |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
- if (num_samples > 1 && rasterizer->multisample)
- dw[1] |= GEN6_SF_MSRAST_ON_PATTERN;
+ if (state->scissor)
+ dw2 |= GEN6_SF_SCISSOR_ENABLE;
- dw[2] = GEN6_SF_LINE_AA_MODE_TRUE |
- GEN6_SF_VERTEX_SUBPIXEL_8BITS;
+ dw3 = GEN6_SF_LINE_AA_MODE_TRUE |
+ GEN6_SF_VERTEX_SUBPIXEL_8BITS;
- if (rasterizer->line_last_pixel)
- dw[2] |= 1 << 31;
+ if (state->line_last_pixel)
+ dw3 |= 1 << 31;
- if (rasterizer->flatshade_first) {
- dw[2] |= 0 << GEN6_SF_TRI_PROVOKE_SHIFT |
- 0 << GEN6_SF_LINE_PROVOKE_SHIFT |
- 1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
+ if (state->flatshade_first) {
+ dw3 |= 0 << GEN6_SF_TRI_PROVOKE_SHIFT |
+ 0 << GEN6_SF_LINE_PROVOKE_SHIFT |
+ 1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
}
else {
- dw[2] |= 2 << GEN6_SF_TRI_PROVOKE_SHIFT |
- 1 << GEN6_SF_LINE_PROVOKE_SHIFT |
- 2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
+ dw3 |= 2 << GEN6_SF_TRI_PROVOKE_SHIFT |
+ 1 << GEN6_SF_LINE_PROVOKE_SHIFT |
+ 2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
}
- if (!rasterizer->point_size_per_vertex)
- dw[2] |= GEN6_SF_USE_STATE_POINT_WIDTH;
+ if (!state->point_size_per_vertex)
+ dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH;
- dw[2] |= point_width;
+ /* in U8.3 */
+ point_width = (int) (state->point_size * 8.0f + 0.5f);
+ point_width = CLAMP(point_width, 1, 2047);
- dw[3] = fui(offset_const);
- dw[4] = fui(offset_scale);
- dw[5] = fui(offset_clamp);
+ dw3 |= point_width;
+
+ STATIC_ASSERT(Elements(sf->payload) >= 6);
+ sf->payload[0] = dw1;
+ sf->payload[1] = dw2;
+ sf->payload[2] = dw3;
+ sf->payload[3] = fui(offset_const);
+ sf->payload[4] = fui(offset_scale);
+ sf->payload[5] = fui(offset_clamp);
+
+ if (state->multisample) {
+ sf->dw_msaa = GEN6_SF_MSRAST_ON_PATTERN;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 251:
+ *
+ * "Software must not program a value of 0.0 when running in
+ * MSRASTMODE_ON_xxx modes - zero-width lines are not available
+ * when multisampling rasterization is enabled."
+ */
+ if (!line_width) {
+ line_width = 128; /* 1.0f */
+
+ sf->dw_msaa |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
+ }
+ }
+ else {
+ sf->dw_msaa = 0;
+ }
+}
+
+/**
+ * Fill in DW2 to DW7 of 3DSTATE_SF.
+ */
+void
+ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
+ const struct ilo_rasterizer_state *rasterizer,
+ int num_samples,
+ enum pipe_format depth_format,
+ uint32_t *payload, unsigned payload_len)
+{
+ const struct ilo_rasterizer_sf *sf = &rasterizer->sf;
+
+ assert(payload_len == Elements(sf->payload));
+
+ if (sf) {
+ memcpy(payload, sf->payload, sizeof(sf->payload));
+
+ if (num_samples > 1)
+ payload[1] |= sf->dw_msaa;
+
+ if (dev->gen >= ILO_GEN(7)) {
+ int format;
+
+ /* separate stencil */
+ switch (depth_format) {
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ depth_format = PIPE_FORMAT_Z24X8_UNORM;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ depth_format = PIPE_FORMAT_Z32_FLOAT;;
+ break;
+ case PIPE_FORMAT_S8_UINT:
+ depth_format = PIPE_FORMAT_NONE;
+ break;
+ default:
+ break;
+ }
+
+ format = gen6_translate_depth_format(depth_format);
+ /* FLOAT surface is assumed when there is no depth buffer */
+ if (format < 0)
+ format = BRW_DEPTHFORMAT_D32_FLOAT;
+
+ payload[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
+ }
+ }
+ else {
+ payload[0] = 0;
+ payload[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0;
+ payload[2] = 0;
+ payload[3] = 0;
+ payload[4] = 0;
+ payload[5] = 0;
+ }
}
/**
*/
void
ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
- const struct pipe_rasterizer_state *rasterizer,
- const struct ilo_shader *fs,
- const struct ilo_shader *last_sh,
+ const struct ilo_rasterizer_state *rasterizer,
+ const struct ilo_shader_state *fs,
+ const struct ilo_shader_state *last_sh,
uint32_t *dw, int num_dwords)
{
- uint32_t point_sprite_enable, const_interp_enable;
- uint16_t attr_ctrl[PIPE_MAX_SHADER_INPUTS];
- int vue_offset, vue_len;
- int dst, max_src, i;
+ int output_count, vue_offset, vue_len;
+ const struct ilo_kernel_routing *routing;
ILO_GPE_VALID_GEN(dev, 6, 7);
assert(num_dwords == 13);
if (!fs) {
+ memset(dw, 0, sizeof(dw[0]) * num_dwords);
+
if (dev->gen >= ILO_GEN(7))
dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT;
else
dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT;
- for (i = 1; i < num_dwords; i++)
- dw[i] = 0;
-
return;
}
- if (last_sh) {
- /* skip PSIZE and POSITION (how about the optional CLIPDISTs?) */
- assert(last_sh->out.semantic_names[0] == TGSI_SEMANTIC_PSIZE);
- assert(last_sh->out.semantic_names[1] == TGSI_SEMANTIC_POSITION);
- vue_offset = 2;
- vue_len = last_sh->out.count - vue_offset;
- }
- else {
- vue_offset = 0;
- vue_len = fs->in.count;
- }
-
- point_sprite_enable = 0;
- const_interp_enable = 0;
- max_src = (last_sh) ? 0 : fs->in.count - 1;
-
- for (dst = 0; dst < fs->in.count; dst++) {
- const int semantic = fs->in.semantic_names[dst];
- const int index = fs->in.semantic_indices[dst];
- const int interp = fs->in.interp[dst];
- int src;
- uint16_t ctrl;
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 268:
- *
- * "This field (Point Sprite Texture Coordinate Enable) must be
- * programmed to 0 when non-point primitives are rendered."
- *
- * TODO We do not check that yet.
- */
- if (semantic == TGSI_SEMANTIC_GENERIC &&
- (rasterizer->sprite_coord_enable & (1 << index)))
- point_sprite_enable |= 1 << dst;
-
- if (interp == TGSI_INTERPOLATE_CONSTANT ||
- (interp == TGSI_INTERPOLATE_COLOR && rasterizer->flatshade))
- const_interp_enable |= 1 << dst;
-
- if (!last_sh) {
- attr_ctrl[dst] = 0;
- continue;
- }
-
- /* find the matching VS/GS OUT for FS IN[i] */
- ctrl = 0;
- for (src = 0; src < vue_len; src++) {
- if (last_sh->out.semantic_names[vue_offset + src] != semantic ||
- last_sh->out.semantic_indices[vue_offset + src] != index)
- continue;
-
- ctrl = src;
-
- if (semantic == TGSI_SEMANTIC_COLOR && rasterizer->light_twoside &&
- src < vue_len - 1) {
- const int next = src + 1;
-
- if (last_sh->out.semantic_names[vue_offset + next] ==
- TGSI_SEMANTIC_BCOLOR &&
- last_sh->out.semantic_indices[vue_offset + next] == index) {
- ctrl |= ATTRIBUTE_SWIZZLE_INPUTATTR_FACING <<
- ATTRIBUTE_SWIZZLE_SHIFT;
- src++;
- }
- }
-
- break;
- }
-
- /* if there is no COLOR, try BCOLOR */
- if (src >= vue_len && semantic == TGSI_SEMANTIC_COLOR) {
- for (src = 0; src < vue_len; src++) {
- if (last_sh->out.semantic_names[vue_offset + src] !=
- TGSI_SEMANTIC_BCOLOR ||
- last_sh->out.semantic_indices[vue_offset + src] != index)
- continue;
-
- ctrl = src;
- break;
- }
- }
-
- if (src < vue_len) {
- attr_ctrl[dst] = ctrl;
- if (max_src < src)
- max_src = src;
- }
- else {
- /*
- * The previous shader stage does not output this attribute. The
- * value is supposed to be undefined for fs, unless the attribute
- * goes through point sprite replacement or the attribute is
- * TGSI_SEMANTIC_POSITION. In all cases, we do not care which source
- * attribute is picked.
- *
- * We should update the fs code and omit the output of
- * TGSI_SEMANTIC_POSITION here.
- */
- attr_ctrl[dst] = 0;
- }
- }
+ output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
+ assert(output_count <= 32);
- for (; dst < Elements(attr_ctrl); dst++)
- attr_ctrl[dst] = 0;
+ routing = ilo_shader_get_kernel_routing(fs);
- /* only the first 16 attributes can be remapped */
- for (dst = 16; dst < Elements(attr_ctrl); dst++)
- assert(attr_ctrl[dst] == 0 || attr_ctrl[dst] == dst);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 248:
- *
- * "It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
- * 0 indicating no Vertex URB data to be read.
- *
- * This field should be set to the minimum length required to read the
- * maximum source attribute. The maximum source attribute is indicated
- * by the maximum value of the enabled Attribute # Source Attribute if
- * Attribute Swizzle Enable is set, Number of Output Attributes-1 if
- * enable is not set.
- *
- * read_length = ceiling((max_source_attr+1)/2)
- *
- * [errata] Corruption/Hang possible if length programmed larger than
- * recommended"
- */
- vue_len = max_src + 1;
-
- assert(fs->in.count <= 32);
+ vue_offset = routing->source_skip;
assert(vue_offset % 2 == 0);
+ vue_offset /= 2;
- if (dev->gen >= ILO_GEN(7)) {
- dw[0] = fs->in.count << GEN7_SBE_NUM_OUTPUTS_SHIFT |
- (vue_len + 1) / 2 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
- vue_offset / 2 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
+ vue_len = (routing->source_len + 1) / 2;
+ if (!vue_len)
+ vue_len = 1;
- if (last_sh)
+ if (dev->gen >= ILO_GEN(7)) {
+ dw[0] = output_count << GEN7_SBE_NUM_OUTPUTS_SHIFT |
+ vue_len << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
+ vue_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
+ if (routing->swizzle_enable)
dw[0] |= GEN7_SBE_SWIZZLE_ENABLE;
}
else {
- dw[0] = fs->in.count << GEN6_SF_NUM_OUTPUTS_SHIFT |
- (vue_len + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
- vue_offset / 2 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
-
- if (last_sh)
+ dw[0] = output_count << GEN6_SF_NUM_OUTPUTS_SHIFT |
+ vue_len << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
+ vue_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
+ if (routing->swizzle_enable)
dw[0] |= GEN6_SF_SWIZZLE_ENABLE;
}
- switch (rasterizer->sprite_coord_mode) {
+ switch (rasterizer->state.sprite_coord_mode) {
case PIPE_SPRITE_COORD_UPPER_LEFT:
dw[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT;
break;
break;
}
- for (i = 0; i < 8; i++)
- dw[1 + i] = attr_ctrl[2 * i + 1] << 16 | attr_ctrl[2 * i];
+ STATIC_ASSERT(Elements(routing->swizzles) >= 16);
+ memcpy(&dw[1], routing->swizzles, 2 * 16);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 268:
+ *
+ * "This field (Point Sprite Texture Coordinate Enable) must be
+ * programmed to 0 when non-point primitives are rendered."
+ *
+ * TODO We do not check that yet.
+ */
+ dw[9] = routing->point_sprite_enable;
- dw[9] = point_sprite_enable;
- dw[10] = const_interp_enable;
+ dw[10] = routing->const_interp_enable;
/* WrapShortest enables */
dw[11] = 0;
static void
gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
- const struct pipe_rasterizer_state *rasterizer,
- const struct ilo_shader *fs,
- const struct ilo_shader *last_sh,
+ const struct ilo_rasterizer_state *rasterizer,
+ const struct ilo_shader_state *fs,
+ const struct ilo_shader_state *last_sh,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
const uint8_t cmd_len = 20;
- uint32_t dw_raster[6], dw_sbe[13];
+ uint32_t payload_raster[6], payload_sbe[13];
ILO_GPE_VALID_GEN(dev, 6, 6);
ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer,
- 1, PIPE_FORMAT_NONE, false, dw_raster, Elements(dw_raster));
+ 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster));
ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
- fs, last_sh, dw_sbe, Elements(dw_sbe));
+ fs, last_sh, payload_sbe, Elements(payload_sbe));
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, dw_sbe[0]);
- ilo_cp_write_multi(cp, dw_raster, 6);
- ilo_cp_write_multi(cp, &dw_sbe[1], 12);
+ ilo_cp_write(cp, payload_sbe[0]);
+ ilo_cp_write_multi(cp, payload_raster, 6);
+ ilo_cp_write_multi(cp, &payload_sbe[1], 12);
ilo_cp_end(cp);
}
-static void
-gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
- const struct ilo_shader *fs,
- int num_samplers,
- const struct pipe_rasterizer_state *rasterizer,
- bool dual_blend, bool cc_may_kill,
- struct ilo_cp *cp)
+void
+ilo_gpe_init_rasterizer_wm_gen6(const struct ilo_dev_info *dev,
+ const struct pipe_rasterizer_state *state,
+ struct ilo_rasterizer_wm *wm)
{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
- const uint8_t cmd_len = 9;
- const int num_samples = 1;
+ uint32_t dw5, dw6;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ /* only the FF unit states are set, as in GEN7 */
+
+ dw5 = GEN6_WM_LINE_AA_WIDTH_2_0;
+
+ /* same value as in 3DSTATE_SF */
+ if (state->line_smooth)
+ dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0;
+
+ if (state->poly_stipple_enable)
+ dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE;
+ if (state->line_stipple_enable)
+ dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE;
+
+ dw6 = GEN6_WM_POSITION_ZW_PIXEL |
+ GEN6_WM_MSRAST_OFF_PIXEL |
+ GEN6_WM_MSDISPMODE_PERSAMPLE;
+
+ if (state->bottom_edge_rule)
+ dw6 |= GEN6_WM_POINT_RASTRULE_UPPER_RIGHT;
+
+ /*
+ * assertion that makes sure
+ *
+ * dw6 |= wm->dw_msaa_rast | wm->dw_msaa_disp;
+ *
+ * is valid
+ */
+ STATIC_ASSERT(GEN6_WM_MSRAST_OFF_PIXEL == 0 &&
+ GEN6_WM_MSDISPMODE_PERSAMPLE == 0);
+
+ wm->dw_msaa_rast =
+ (state->multisample) ? GEN6_WM_MSRAST_ON_PATTERN : 0;
+ wm->dw_msaa_disp = GEN6_WM_MSDISPMODE_PERPIXEL;
+
+ STATIC_ASSERT(Elements(wm->payload) >= 2);
+ wm->payload[0] = dw5;
+ wm->payload[1] = dw6;
+}
+
+void
+ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *fs,
+ struct ilo_shader_cso *cso)
+{
+ int start_grf, input_count, interps, max_threads;
uint32_t dw2, dw4, dw5, dw6;
- int max_threads;
ILO_GPE_VALID_GEN(dev, 6, 6);
+ start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
+ input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
+ interps = ilo_shader_get_kernel_param(fs,
+ ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS);
+
/* see brwCreateContext() */
max_threads = (dev->gt == 2) ? 80 : 40;
- if (!fs) {
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- /* honor the valid range even if dispatching is disabled */
- ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
+ dw2 = (true) ? 0 : GEN6_WM_FLOATING_POINT_MODE_ALT;
- return;
- }
-
- dw2 = (num_samplers + 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT;
- if (false)
- dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT;
-
- dw4 = fs->in.start_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0 |
+ dw4 = start_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0 |
0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1 |
0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2;
- if (true) {
- dw4 |= GEN6_WM_STATISTICS_ENABLE;
- }
- else {
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 248:
- *
- * "This bit (Statistics Enable) must be disabled if either of these
- * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer
- * Resolve Enable or Depth Buffer Resolve Enable."
- */
- dw4 |= GEN6_WM_DEPTH_CLEAR;
- dw4 |= GEN6_WM_DEPTH_RESOLVE;
- dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
- }
-
- dw5 = (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT |
- GEN6_WM_LINE_AA_WIDTH_2_0;
+ dw5 = (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 275:
* therefore not via PS instructions, there should be no need to
* ENABLE this bit due to ClipDistance clipping."
*/
- if (fs->has_kill || cc_may_kill)
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
dw5 |= GEN6_WM_KILL_ENABLE;
/*
*
* TODO This is not checked yet.
*/
- if (fs->out.has_pos)
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
dw5 |= GEN6_WM_COMPUTED_DEPTH;
- if (fs->in.has_pos)
- dw5 |= GEN6_WM_USES_SOURCE_DEPTH | GEN6_WM_USES_SOURCE_W;
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
+ dw5 |= GEN6_WM_USES_SOURCE_DEPTH;
+
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
+ dw5 |= GEN6_WM_USES_SOURCE_W;
/*
- * Set this bit if
+ * TODO set this bit only when
*
* a) fs writes colors and color is not masked, or
* b) fs writes depth, or
if (true)
dw5 |= GEN6_WM_DISPATCH_ENABLE;
- /* same value as in 3DSTATE_SF */
- if (rasterizer->line_smooth)
- dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0;
+ assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
+ dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
- if (rasterizer->poly_stipple_enable)
- dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE;
- if (rasterizer->line_stipple_enable)
- dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE;
+ dw6 = input_count << GEN6_WM_NUM_SF_OUTPUTS_SHIFT |
+ GEN6_WM_POSOFFSET_NONE |
+ interps << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
- if (dual_blend)
- dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
+ STATIC_ASSERT(Elements(cso->payload) >= 4);
+ cso->payload[0] = dw2;
+ cso->payload[1] = dw4;
+ cso->payload[2] = dw5;
+ cso->payload[3] = dw6;
+}
- if (fs->dispatch_16)
- dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
- else
- dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
+static void
+gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *fs,
+ int num_samplers,
+ const struct ilo_rasterizer_state *rasterizer,
+ bool dual_blend, bool cc_may_kill,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
+ const uint8_t cmd_len = 9;
+ const int num_samples = 1;
+ const struct ilo_shader_cso *fs_cso;
+ uint32_t dw2, dw4, dw5, dw6;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ if (!fs) {
+ /* see brwCreateContext() */
+ const int max_threads = (dev->gt == 2) ? 80 : 40;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ /* honor the valid range even if dispatching is disabled */
+ ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+
+ return;
+ }
+
+ fs_cso = ilo_shader_get_kernel_cso(fs);
+ dw2 = fs_cso->payload[0];
+ dw4 = fs_cso->payload[1];
+ dw5 = fs_cso->payload[2];
+ dw6 = fs_cso->payload[3];
+
+ dw2 |= (num_samplers + 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT;
+
+ if (true) {
+ dw4 |= GEN6_WM_STATISTICS_ENABLE;
+ }
+ else {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 248:
+ *
+ * "This bit (Statistics Enable) must be disabled if either of these
+ * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer
+ * Resolve Enable or Depth Buffer Resolve Enable."
+ */
+ dw4 |= GEN6_WM_DEPTH_CLEAR;
+ dw4 |= GEN6_WM_DEPTH_RESOLVE;
+ dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
+ }
+
+ if (cc_may_kill) {
+ dw5 |= GEN6_WM_KILL_ENABLE |
+ GEN6_WM_DISPATCH_ENABLE;
+ }
+
+ if (dual_blend)
+ dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
- dw6 = fs->in.count << GEN6_WM_NUM_SF_OUTPUTS_SHIFT |
- GEN6_WM_POSOFFSET_NONE |
- GEN6_WM_POSITION_ZW_PIXEL |
- fs->in.barycentric_interpolation_mode <<
- GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
+ dw5 |= rasterizer->wm.payload[0];
- if (rasterizer->bottom_edge_rule)
- dw6 |= GEN6_WM_POINT_RASTRULE_UPPER_RIGHT;
+ dw6 |= rasterizer->wm.payload[1];
if (num_samples > 1) {
- if (rasterizer->multisample)
- dw6 |= GEN6_WM_MSRAST_ON_PATTERN;
- else
- dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
- dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
- }
- else {
- dw6 |= GEN6_WM_MSRAST_OFF_PIXEL |
- GEN6_WM_MSDISPMODE_PERSAMPLE;
+ dw6 |= rasterizer->wm.dw_msaa_rast |
+ rasterizer->wm.dw_msaa_disp;
}
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, fs->cache_offset);
+ ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
ilo_cp_write(cp, dw2);
ilo_cp_write(cp, 0); /* scratch */
ilo_cp_write(cp, dw4);
ilo_cp_end(cp);
}
-static int
-gen6_get_depth_buffer_format(const struct ilo_dev_info *dev,
- enum pipe_format format,
- bool hiz,
- bool separate_stencil,
- bool *has_depth,
- bool *has_stencil)
+struct ilo_zs_surface_info {
+ int surface_type;
+ int format;
+
+ struct {
+ struct intel_bo *bo;
+ unsigned stride;
+ enum intel_tiling_mode tiling;
+ uint32_t offset;
+ } zs, stencil, hiz;
+
+ unsigned width, height, depth;
+ unsigned lod, first_layer, num_layers;
+ uint32_t x_offset, y_offset;
+};
+
+static void
+zs_init_info_null(const struct ilo_dev_info *dev,
+ struct ilo_zs_surface_info *info)
+{
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ memset(info, 0, sizeof(*info));
+
+ info->surface_type = BRW_SURFACE_NULL;
+ info->format = BRW_DEPTHFORMAT_D32_FLOAT;
+ info->width = 1;
+ info->height = 1;
+ info->depth = 1;
+ info->num_layers = 1;
+}
+
+static void
+zs_init_info(const struct ilo_dev_info *dev,
+ const struct ilo_texture *tex,
+ enum pipe_format format,
+ unsigned level,
+ unsigned first_layer, unsigned num_layers,
+ struct ilo_zs_surface_info *info)
{
- int depth_format;
+ const bool rebase_layer = true;
+ struct intel_bo * const hiz_bo = NULL;
+ bool separate_stencil;
+ uint32_t x_offset[3], y_offset[3];
ILO_GPE_VALID_GEN(dev, 6, 7);
- *has_depth = true;
- *has_stencil = false;
+ memset(info, 0, sizeof(*info));
+
+ info->surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
+
+ if (info->surface_type == BRW_SURFACE_CUBE) {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
+ *
+ * "For Other Surfaces (Cube Surfaces):
+ * This field (Minimum Array Element) is ignored."
+ *
+ * "For Other Surfaces (Cube Surfaces):
+ * This field (Render Target View Extent) is ignored."
+ *
+ * As such, we cannot set first_layer and num_layers on cube surfaces.
+ * To work around that, treat it as a 2D surface.
+ */
+ info->surface_type = BRW_SURFACE_2D;
+ }
+
+ if (dev->gen >= ILO_GEN(7)) {
+ separate_stencil = true;
+ }
+ else {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 317:
+ *
+ * "This field (Separate Stencil Buffer Enable) must be set to the
+ * same value (enabled or disabled) as Hierarchical Depth Buffer
+ * Enable."
+ */
+ separate_stencil = (hiz_bo != NULL);
+ }
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 317:
* is indeed used, the depth values output by the fragment shaders will
* be different when read back.
*
- * As for GEN7+, separate_stencil_buffer is always true.
+ * As for GEN7+, separate_stencil is always true.
*/
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
- depth_format = BRW_DEPTHFORMAT_D16_UNORM;
+ info->format = BRW_DEPTHFORMAT_D16_UNORM;
break;
case PIPE_FORMAT_Z32_FLOAT:
- depth_format = BRW_DEPTHFORMAT_D32_FLOAT;
+ info->format = BRW_DEPTHFORMAT_D32_FLOAT;
break;
case PIPE_FORMAT_Z24X8_UNORM:
- depth_format = (separate_stencil) ?
- BRW_DEPTHFORMAT_D24_UNORM_X8_UINT :
- BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
- break;
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- depth_format = (separate_stencil) ?
+ info->format = (separate_stencil) ?
BRW_DEPTHFORMAT_D24_UNORM_X8_UINT :
BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
- *has_stencil = true;
break;
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- depth_format = (separate_stencil) ?
+ info->format = (separate_stencil) ?
BRW_DEPTHFORMAT_D32_FLOAT :
BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
- *has_stencil = true;
break;
case PIPE_FORMAT_S8_UINT:
if (separate_stencil) {
- depth_format = BRW_DEPTHFORMAT_D32_FLOAT;
- *has_depth = false;
- *has_stencil = true;
+ info->format = BRW_DEPTHFORMAT_D32_FLOAT;
break;
}
/* fall through */
default:
assert(!"unsupported depth/stencil format");
- depth_format = BRW_DEPTHFORMAT_D32_FLOAT;
- *has_depth = false;
- *has_stencil = false;
+ zs_init_info_null(dev, info);
+ return;
break;
}
- return depth_format;
-}
+ if (format != PIPE_FORMAT_S8_UINT) {
+ info->zs.bo = tex->bo;
+ info->zs.stride = tex->bo_stride;
+ info->zs.tiling = tex->tiling;
-static void
-gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
- const struct pipe_surface *surface,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
- ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
- const uint8_t cmd_len = 7;
- const int max_2d_size = (dev->gen >= ILO_GEN(7)) ? 16384 : 8192;
- const int max_array_size = (dev->gen >= ILO_GEN(7)) ? 2048 : 512;
- const bool hiz = false;
- struct ilo_texture *tex;
- uint32_t dw1, dw3, dw4, dw6;
- uint32_t slice_offset, x_offset, y_offset;
- int surface_type, depth_format;
- unsigned lod, first_layer, num_layers;
- unsigned width, height, depth;
- bool separate_stencil, has_depth, has_stencil;
+ if (rebase_layer) {
+ info->zs.offset = ilo_texture_get_slice_offset(tex,
+ level, first_layer, &x_offset[0], &y_offset[0]);
+ }
+ }
- ILO_GPE_VALID_GEN(dev, 6, 7);
+ if (tex->separate_s8 || format == PIPE_FORMAT_S8_UINT) {
+ const struct ilo_texture *s8_tex =
+ (tex->separate_s8) ? tex->separate_s8 : tex;
+
+ info->stencil.bo = s8_tex->bo;
- if (dev->gen >= ILO_GEN(7)) {
- separate_stencil = true;
- }
- else {
/*
- * From the Sandy Bridge PRM, volume 2 part 1, page 317:
+ * From the Sandy Bridge PRM, volume 2 part 1, page 329:
*
- * "This field (Separate Stencil Buffer Enable) must be set to the
- * same value (enabled or disabled) as Hierarchical Depth Buffer
- * Enable."
+ * "The pitch must be set to 2x the value computed based on width,
+ * as the stencil buffer is stored with two rows interleaved."
+ *
+ * According to the classic driver, we need to do the same for GEN7+
+ * even though the Ivy Bridge PRM does not say anything about it.
*/
- separate_stencil = hiz;
- }
+ info->stencil.stride = s8_tex->bo_stride * 2;
- if (surface) {
- depth_format = gen6_get_depth_buffer_format(dev,
- surface->format, hiz, separate_stencil, &has_depth, &has_stencil);
- }
- else {
- has_depth = false;
- has_stencil = false;
- }
-
- if (!has_depth && !has_stencil) {
- dw1 = BRW_SURFACE_NULL << 29 |
- BRW_DEPTHFORMAT_D32_FLOAT << 18;
+ info->stencil.tiling = s8_tex->tiling;
- /* Y-tiled */
- if (dev->gen == ILO_GEN(6)) {
- dw1 |= 1 << 27 |
- 1 << 26;
+ if (rebase_layer) {
+ info->stencil.offset = ilo_texture_get_slice_offset(s8_tex,
+ level, first_layer, &x_offset[1], &y_offset[1]);
}
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, dw1);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
-
- return;
}
- tex = ilo_texture(surface->texture);
-
- surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
- lod = surface->u.tex.level;
- first_layer = surface->u.tex.first_layer;
- num_layers = surface->u.tex.last_layer - first_layer + 1;
+ if (hiz_bo) {
+ info->hiz.bo = hiz_bo;
+ info->hiz.stride = 0;
+ info->hiz.tiling = 0;
+ info->hiz.offset = 0;
+ x_offset[2] = 0;
+ y_offset[2] = 0;
+ }
- width = tex->base.width0;
- height = tex->base.height0;
- depth = (tex->base.target == PIPE_TEXTURE_3D) ?
+ info->width = tex->base.width0;
+ info->height = tex->base.height0;
+ info->depth = (tex->base.target == PIPE_TEXTURE_3D) ?
tex->base.depth0 : num_layers;
- if (surface_type == BRW_SURFACE_CUBE) {
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
- *
- * "For Other Surfaces (Cube Surfaces):
- * This field (Minimum Array Element) is ignored."
- *
- * "For Other Surfaces (Cube Surfaces):
- * This field (Render Target View Extent) is ignored."
- *
- * As such, we cannot set first_layer and num_layers on cube surfaces.
- * To work around that, treat it as a 2D surface.
- */
- surface_type = BRW_SURFACE_2D;
- }
+ info->lod = level;
+ info->first_layer = first_layer;
+ info->num_layers = num_layers;
+
+ if (rebase_layer) {
+ /* the size of the layer */
+ info->width = u_minify(info->width, level);
+ info->height = u_minify(info->height, level);
+ if (info->surface_type == BRW_SURFACE_3D)
+ info->depth = u_minify(info->depth, level);
+ else
+ info->depth = 1;
- /*
- * we always treat the resource as non-mipmapped and set the slice/x/y
- * offsets manually
- */
- if (true) {
/* no layered rendering */
assert(num_layers == 1);
- slice_offset = ilo_texture_get_slice_offset(tex,
- lod, first_layer, &x_offset, &y_offset);
+ info->lod = 0;
+ info->first_layer = 0;
+ info->num_layers = 1;
+
+ /* all three share the same X/Y offsets */
+ if (info->zs.bo) {
+ if (info->stencil.bo) {
+ assert(x_offset[0] == x_offset[1]);
+ assert(y_offset[0] == y_offset[1]);
+ }
+
+ info->x_offset = x_offset[0];
+ info->y_offset = y_offset[0];
+ }
+ else {
+ assert(info->stencil.bo);
+
+ info->x_offset = x_offset[1];
+ info->y_offset = y_offset[1];
+ }
+
+ if (info->hiz.bo) {
+ assert(info->x_offset == x_offset[2]);
+ assert(info->y_offset == y_offset[2]);
+ }
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 326:
* sure that does not happen eventually.
*/
if (dev->gen >= ILO_GEN(7)) {
- assert((x_offset & 7) == 0 && (y_offset & 7) == 0);
- x_offset &= ~7;
- y_offset &= ~7;
+ assert((info->x_offset & 7) == 0 && (info->y_offset & 7) == 0);
+ info->x_offset &= ~7;
+ info->y_offset &= ~7;
}
- /* the size of the layer */
- width = u_minify(width, lod);
- height = u_minify(height, lod);
- if (surface_type == BRW_SURFACE_3D)
- depth = u_minify(depth, lod);
- else
- depth = 1;
-
- lod = 0;
- first_layer = 0;
-
- width += x_offset;
- height += y_offset;
+ info->width += info->x_offset;
+ info->height += info->y_offset;
/* we have to treat them as 2D surfaces */
- if (surface_type == BRW_SURFACE_CUBE) {
+ if (info->surface_type == BRW_SURFACE_CUBE) {
assert(tex->base.width0 == tex->base.height0);
/* we will set slice_offset to point to the single face */
- surface_type = BRW_SURFACE_2D;
+ info->surface_type = BRW_SURFACE_2D;
}
- else if (surface_type == BRW_SURFACE_1D && height > 1) {
+ else if (info->surface_type == BRW_SURFACE_1D && info->height > 1) {
assert(tex->base.height0 == 1);
- surface_type = BRW_SURFACE_2D;
+ info->surface_type = BRW_SURFACE_2D;
}
}
- else {
- slice_offset = 0;
- x_offset = 0;
- y_offset = 0;
- }
+}
+
+void
+ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev,
+ const struct ilo_texture *tex,
+ enum pipe_format format,
+ unsigned level,
+ unsigned first_layer, unsigned num_layers,
+ struct ilo_zs_surface *zs)
+{
+ const int max_2d_size = (dev->gen >= ILO_GEN(7)) ? 16384 : 8192;
+ const int max_array_size = (dev->gen >= ILO_GEN(7)) ? 2048 : 512;
+ struct ilo_zs_surface_info info;
+ uint32_t dw1, dw2, dw3, dw4, dw5, dw6;
- /* required for GEN6+ */
- assert(tex->tiling == INTEL_TILING_Y);
- assert(tex->bo_stride > 0 && tex->bo_stride < 128 * 1024 &&
- tex->bo_stride % 128 == 0);
- assert(width <= tex->bo_stride);
+ ILO_GPE_VALID_GEN(dev, 6, 7);
- switch (surface_type) {
+ if (tex)
+ zs_init_info(dev, tex, format, level, first_layer, num_layers, &info);
+ else
+ zs_init_info_null(dev, &info);
+
+ switch (info.surface_type) {
+ case BRW_SURFACE_NULL:
+ break;
case BRW_SURFACE_1D:
- assert(width <= max_2d_size && height == 1 &&
- depth <= max_array_size);
- assert(first_layer < max_array_size - 1 &&
- num_layers <= max_array_size);
+ assert(info.width <= max_2d_size && info.height == 1 &&
+ info.depth <= max_array_size);
+ assert(info.first_layer < max_array_size - 1 &&
+ info.num_layers <= max_array_size);
break;
case BRW_SURFACE_2D:
- assert(width <= max_2d_size && height <= max_2d_size &&
- depth <= max_array_size);
- assert(first_layer < max_array_size - 1 &&
- num_layers <= max_array_size);
+ assert(info.width <= max_2d_size && info.height <= max_2d_size &&
+ info.depth <= max_array_size);
+ assert(info.first_layer < max_array_size - 1 &&
+ info.num_layers <= max_array_size);
break;
case BRW_SURFACE_3D:
- assert(width <= 2048 && height <= 2048 && depth <= 2048);
- assert(first_layer < 2048 && num_layers <= max_array_size);
- assert(x_offset == 0 && y_offset == 0);
+ assert(info.width <= 2048 && info.height <= 2048 && info.depth <= 2048);
+ assert(info.first_layer < 2048 && info.num_layers <= max_array_size);
+ assert(info.x_offset == 0 && info.y_offset == 0);
break;
case BRW_SURFACE_CUBE:
- assert(width <= max_2d_size && height <= max_2d_size && depth == 1);
- assert(first_layer == 0 && num_layers == 1);
- assert(width == height);
- assert(x_offset == 0 && y_offset == 0);
+ assert(info.width <= max_2d_size && info.height <= max_2d_size &&
+ info.depth == 1);
+ assert(info.first_layer == 0 && info.num_layers == 1);
+ assert(info.width == info.height);
+ assert(info.x_offset == 0 && info.y_offset == 0);
break;
default:
assert(!"unexpected depth surface type");
break;
}
- dw1 = surface_type << 29 |
- depth_format << 18 |
- (tex->bo_stride - 1);
+ dw1 = info.surface_type << 29 |
+ info.format << 18;
+
+ if (info.zs.bo) {
+ /* required for GEN6+ */
+ assert(info.zs.tiling == INTEL_TILING_Y);
+ assert(info.zs.stride > 0 && info.zs.stride < 128 * 1024 &&
+ info.zs.stride % 128 == 0);
+ assert(info.width <= info.zs.stride);
+
+ dw1 |= (info.zs.stride - 1);
+ dw2 = info.zs.offset;
+ }
+ else {
+ dw2 = 0;
+ }
if (dev->gen >= ILO_GEN(7)) {
- if (has_depth)
+ if (info.zs.bo)
dw1 |= 1 << 28;
- if (has_stencil)
+ if (info.stencil.bo)
dw1 |= 1 << 27;
- if (hiz)
+ if (info.hiz.bo)
dw1 |= 1 << 22;
- dw3 = (height - 1) << 18 |
- (width - 1) << 4 |
- lod;
+ dw3 = (info.height - 1) << 18 |
+ (info.width - 1) << 4 |
+ info.lod;
+
+ dw4 = (info.depth - 1) << 21 |
+ info.first_layer << 10;
- dw4 = (depth - 1) << 21 |
- first_layer << 10;
+ dw5 = info.y_offset << 16 | info.x_offset;
- dw6 = (num_layers - 1) << 21;
+ dw6 = (info.num_layers - 1) << 21;
}
else {
- dw1 |= (tex->tiling != INTEL_TILING_NONE) << 27 |
- (tex->tiling == INTEL_TILING_Y) << 26;
+ /* always Y-tiled */
+ dw1 |= 1 << 27 |
+ 1 << 26;
- if (hiz) {
+ if (info.hiz.bo) {
dw1 |= 1 << 22 |
1 << 21;
}
- dw3 = (height - 1) << 19 |
- (width - 1) << 6 |
- lod << 2 |
+ dw3 = (info.height - 1) << 19 |
+ (info.width - 1) << 6 |
+ info.lod << 2 |
BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1;
- dw4 = (depth - 1) << 21 |
- first_layer << 10 |
- (num_layers - 1) << 1;
+ dw4 = (info.depth - 1) << 21 |
+ info.first_layer << 10 |
+ (info.num_layers - 1) << 1;
+
+ dw5 = info.y_offset << 16 | info.x_offset;
dw6 = 0;
}
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, dw1);
+ STATIC_ASSERT(Elements(zs->payload) >= 10);
+
+ zs->payload[0] = dw1;
+ zs->payload[1] = dw2;
+ zs->payload[2] = dw3;
+ zs->payload[3] = dw4;
+ zs->payload[4] = dw5;
+ zs->payload[5] = dw6;
+
+ /* do not increment reference count */
+ zs->bo = info.zs.bo;
+
+ /* separate stencil */
+ if (info.stencil.bo) {
+ assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 &&
+ info.stencil.stride % 128 == 0);
- if (has_depth) {
- ilo_cp_write_bo(cp, slice_offset, tex->bo,
- INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
+ zs->payload[6] = info.stencil.stride - 1;
+ zs->payload[7] = info.stencil.offset;
+
+ /* do not increment reference count */
+ zs->separate_s8_bo = info.stencil.bo;
}
else {
- ilo_cp_write(cp, 0);
+ zs->payload[6] = 0;
+ zs->payload[7] = 0;
+ zs->separate_s8_bo = NULL;
}
- ilo_cp_write(cp, dw3);
- ilo_cp_write(cp, dw4);
- ilo_cp_write(cp, y_offset << 16 | x_offset);
- ilo_cp_write(cp, dw6);
+ /* hiz */
+ if (info.hiz.bo) {
+ zs->payload[8] = info.hiz.stride - 1;
+ zs->payload[9] = info.hiz.offset;
+
+ /* do not increment reference count */
+ zs->hiz_bo = info.hiz.bo;
+ }
+ else {
+ zs->payload[8] = 0;
+ zs->payload[9] = 0;
+ zs->hiz_bo = NULL;
+ }
+}
+
+static void
+gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
+ const struct ilo_zs_surface *zs,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
+ ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
+ const uint8_t cmd_len = 7;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, zs->payload[0]);
+ ilo_cp_write_bo(cp, zs->payload[1], zs->bo,
+ INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
+ ilo_cp_write(cp, zs->payload[2]);
+ ilo_cp_write(cp, zs->payload[3]);
+ ilo_cp_write(cp, zs->payload[4]);
+ ilo_cp_write(cp, zs->payload[5]);
ilo_cp_end(cp);
}
static void
gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev,
- const struct pipe_surface *surface,
+ const struct ilo_zs_surface *zs,
struct ilo_cp *cp)
{
const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
ILO_GPE_CMD(0x3, 0x0, 0x06) :
ILO_GPE_CMD(0x3, 0x1, 0x0e);
const uint8_t cmd_len = 3;
- struct ilo_texture *tex;
- uint32_t slice_offset, x_offset, y_offset;
- int pitch;
ILO_GPE_VALID_GEN(dev, 6, 7);
- tex = (surface) ? ilo_texture(surface->texture) : NULL;
- if (tex && surface->format != PIPE_FORMAT_S8_UINT)
- tex = tex->separate_s8;
-
- if (!tex) {
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
-
- return;
- }
-
- if (true) {
- slice_offset = ilo_texture_get_slice_offset(tex,
- surface->u.tex.level, surface->u.tex.first_layer,
- &x_offset, &y_offset);
- /* XXX X/Y offsets inherit from 3DSTATE_DEPTH_BUFFER */
- }
- else {
- slice_offset = 0;
- x_offset = 0;
- y_offset = 0;
- }
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 329:
- *
- * "The pitch must be set to 2x the value computed based on width, as
- * the stencil buffer is stored with two rows interleaved."
- *
- * According to the classic driver, we need to do the same for GEN7+ even
- * though the Ivy Bridge PRM does not say anything about it.
- */
- pitch = 2 * tex->bo_stride;
- assert(pitch > 0 && pitch < 128 * 1024 && pitch % 128 == 0);
-
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, pitch - 1);
- ilo_cp_write_bo(cp, slice_offset, tex->bo,
+ /* see ilo_gpe_init_zs_surface() */
+ ilo_cp_write(cp, zs->payload[6]);
+ ilo_cp_write_bo(cp, zs->payload[7], zs->separate_s8_bo,
INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
ilo_cp_end(cp);
}
static void
gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev,
- const struct pipe_surface *surface,
+ const struct ilo_zs_surface *zs,
struct ilo_cp *cp)
{
const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
ILO_GPE_CMD(0x3, 0x0, 0x07) :
ILO_GPE_CMD(0x3, 0x1, 0x0f);
const uint8_t cmd_len = 3;
- const bool hiz = false;
- struct ilo_texture *tex;
- uint32_t slice_offset;
ILO_GPE_VALID_GEN(dev, 6, 7);
- if (!surface || !hiz) {
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
-
- return;
- }
-
- tex = ilo_texture(surface->texture);
-
- /* TODO */
- slice_offset = 0;
-
- assert(tex->bo_stride > 0 && tex->bo_stride < 128 * 1024 &&
- tex->bo_stride % 128 == 0);
-
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, tex->bo_stride - 1);
- ilo_cp_write_bo(cp, slice_offset, tex->bo,
+ /* see ilo_gpe_init_zs_surface() */
+ ilo_cp_write(cp, zs->payload[8]);
+ ilo_cp_write_bo(cp, zs->payload[9], zs->hiz_bo,
INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
ilo_cp_end(cp);
}
static void
gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
const struct pipe_draw_info *info,
+ const struct ilo_ib_state *ib,
bool rectlist,
struct ilo_cp *cp)
{
const int vb_access = (info->indexed) ?
GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
+ const uint32_t vb_start = info->start +
+ ((info->indexed) ? ib->draw_start_offset : 0);
ILO_GPE_VALID_GEN(dev, 6, 6);
prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
vb_access);
ilo_cp_write(cp, info->count);
- ilo_cp_write(cp, info->start);
+ ilo_cp_write(cp, vb_start);
ilo_cp_write(cp, info->instance_count);
ilo_cp_write(cp, info->start_instance);
ilo_cp_write(cp, info->index_bias);
static uint32_t
gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev,
- const struct ilo_shader **cs,
+ const struct ilo_shader_state **cs,
uint32_t *sampler_state,
int *num_samplers,
uint32_t *binding_table_state,
state_len, state_align, &state_offset);
for (i = 0; i < num_ids; i++) {
- int curbe_read_len;
-
- curbe_read_len = (cs[i]->pcb.clip_state_size + 31) / 32;
-
- dw[0] = cs[i]->cache_offset;
+ dw[0] = ilo_shader_get_kernel_offset(cs[i]);
dw[1] = 1 << 18; /* SPF */
dw[2] = sampler_state[i] |
(num_samplers[i] + 3) / 4 << 2;
dw[3] = binding_table_state[i] |
num_surfaces[i];
- dw[4] = curbe_read_len << 16 | /* CURBE Read Length */
- 0; /* CURBE Read Offset */
+ dw[4] = 0 << 16 | /* CURBE Read Length */
+ 0; /* CURBE Read Offset */
dw[5] = 0; /* Barrier ID */
dw[6] = 0;
dw[7] = 0;
max_y = 0;
}
- scissor->payload[start_slot * 2 + 0] = min_y << 16 | min_x;
- scissor->payload[start_slot * 2 + 1] = max_y << 16 | max_x;
- start_slot++;
+ scissor->payload[(start_slot + i) * 2 + 0] = min_y << 16 | min_x;
+ scissor->payload[(start_slot + i) * 2 + 1] = max_y << 16 | max_x;
}
+
+ if (!start_slot && num_states)
+ scissor->scissor0 = states[0];
}
void
return state_offset;
}
-static uint32_t
-gen6_emit_surf_SURFACE_STATE(const struct ilo_dev_info *dev,
- const struct pipe_surface *surface,
- struct ilo_cp *cp)
-{
- struct ilo_view_surface surf;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- if (surface && surface->texture) {
- struct ilo_texture *tex = ilo_texture(surface->texture);
-
- /*
- * classic i965 sets render_cache_rw for constant buffers and sol
- * surfaces but not render buffers. Why?
- */
- ilo_gpe_init_view_surface_for_texture_gen6(dev, tex, surface->format,
- surface->u.tex.level, 1,
- surface->u.tex.first_layer,
- surface->u.tex.last_layer - surface->u.tex.first_layer + 1,
- true, true, &surf);
- }
- else {
- ilo_gpe_init_view_surface_null_gen6(dev,
- surface->width, surface->height, 1, 0, &surf);
- }
-
- return gen6_emit_SURFACE_STATE(dev, &surf, true, cp);
-}
-
-static uint32_t
-gen6_emit_cbuf_SURFACE_STATE(const struct ilo_dev_info *dev,
- const struct pipe_constant_buffer *cbuf,
- struct ilo_cp *cp)
-{
- const enum pipe_format elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- struct ilo_buffer *buf = ilo_buffer(cbuf->buffer);
- struct ilo_view_surface surf;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf,
- cbuf->buffer_offset, cbuf->buffer_size,
- util_format_get_blocksize(elem_format), elem_format,
- false, false, &surf);
-
- return gen6_emit_SURFACE_STATE(dev, &surf, false, cp);
-}
-
static uint32_t
gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev,
const struct pipe_stream_output_target *so,
dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE",
state_len, state_align, &state_offset);
+ /* see ilo_gpe_init_sampler_cso() */
memcpy(dw, &sampler->payload[3], state_len * 4);
return state_offset;
GEN6_SET(SCISSOR_RECT),
GEN6_SET(BINDING_TABLE_STATE),
GEN6_SET(SURFACE_STATE),
- GEN6_SET(surf_SURFACE_STATE),
- GEN6_SET(cbuf_SURFACE_STATE),
GEN6_SET(so_SURFACE_STATE),
GEN6_SET(SAMPLER_STATE),
GEN6_SET(SAMPLER_BORDER_COLOR_STATE),