#include "ilo_state.h"
#include "ilo_gpe_gen6.h"
-/**
- * Translate winsys tiling to hardware tiling.
- */
-int
-ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling)
-{
- switch (tiling) {
- case INTEL_TILING_NONE:
- return 0;
- case INTEL_TILING_X:
- return BRW_SURFACE_TILED;
- case INTEL_TILING_Y:
- return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
- default:
- assert(!"unknown tiling");
- return 0;
- }
-}
-
-/**
- * Translate a pipe primitive type to the matching hardware primitive type.
- */
-int
-ilo_gpe_gen6_translate_pipe_prim(unsigned prim)
-{
- static const int prim_mapping[PIPE_PRIM_MAX] = {
- [PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST,
- [PIPE_PRIM_LINES] = _3DPRIM_LINELIST,
- [PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP,
- [PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP,
- [PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST,
- [PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
- [PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
- [PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST,
- [PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
- [PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON,
- [PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
- [PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
- [PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
- [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
- };
-
- assert(prim_mapping[prim]);
-
- return prim_mapping[prim];
-}
-
-/**
- * Translate a pipe texture target to the matching hardware surface type.
- */
-int
-ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
-{
- switch (target) {
- case PIPE_BUFFER:
- return BRW_SURFACE_BUFFER;
- case PIPE_TEXTURE_1D:
- case PIPE_TEXTURE_1D_ARRAY:
- return BRW_SURFACE_1D;
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_2D_ARRAY:
- return BRW_SURFACE_2D;
- case PIPE_TEXTURE_3D:
- return BRW_SURFACE_3D;
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_CUBE_ARRAY:
- return BRW_SURFACE_CUBE;
- default:
- assert(!"unknown texture target");
- return BRW_SURFACE_BUFFER;
- }
-}
-
-/**
- * Translate a depth/stencil pipe format to the matching hardware
- * format. Return -1 on errors.
- */
-static int
-gen6_translate_depth_format(enum pipe_format format)
-{
- switch (format) {
- case PIPE_FORMAT_Z16_UNORM:
- return BRW_DEPTHFORMAT_D16_UNORM;
- case PIPE_FORMAT_Z32_FLOAT:
- return BRW_DEPTHFORMAT_D32_FLOAT;
- case PIPE_FORMAT_Z24X8_UNORM:
- return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
- default:
- return -1;
- }
-}
-
/**
* Translate a pipe logicop to the matching hardware logicop.
*/
}
}
-/**
- * Translate a pipe DSA test function to the matching hardware compare
- * function.
- */
-static int
-gen6_translate_dsa_func(unsigned func)
-{
- switch (func) {
- case PIPE_FUNC_NEVER: return BRW_COMPAREFUNCTION_NEVER;
- case PIPE_FUNC_LESS: return BRW_COMPAREFUNCTION_LESS;
- case PIPE_FUNC_EQUAL: return BRW_COMPAREFUNCTION_EQUAL;
- case PIPE_FUNC_LEQUAL: return BRW_COMPAREFUNCTION_LEQUAL;
- case PIPE_FUNC_GREATER: return BRW_COMPAREFUNCTION_GREATER;
- case PIPE_FUNC_NOTEQUAL: return BRW_COMPAREFUNCTION_NOTEQUAL;
- case PIPE_FUNC_GEQUAL: return BRW_COMPAREFUNCTION_GEQUAL;
- case PIPE_FUNC_ALWAYS: return BRW_COMPAREFUNCTION_ALWAYS;
- default:
- assert(!"unknown depth/stencil/alpha test function");
- return BRW_COMPAREFUNCTION_NEVER;
- }
-}
-
/**
* Translate a pipe shadow compare function to the matching hardware shadow
* function.
}
}
-/**
- * Translate an index size to the matching hardware index format.
- */
-static int
-gen6_translate_index_size(int size)
-{
- switch (size) {
- case 4: return BRW_INDEX_DWORD;
- case 2: return BRW_INDEX_WORD;
- case 1: return BRW_INDEX_BYTE;
- default:
- assert(!"unknown index size");
- return BRW_INDEX_BYTE;
- }
-}
-
-static void
-gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev,
- struct intel_bo *general_state_bo,
- struct intel_bo *surface_state_bo,
- struct intel_bo *dynamic_state_bo,
- struct intel_bo *indirect_object_bo,
- struct intel_bo *instruction_bo,
- uint32_t general_state_size,
- uint32_t dynamic_state_size,
- uint32_t indirect_object_size,
- uint32_t instruction_size,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01);
- const uint8_t cmd_len = 10;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- /* 4K-page aligned */
- assert(((general_state_size | dynamic_state_size |
- indirect_object_size | instruction_size) & 0xfff) == 0);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
-
- ilo_cp_write_bo(cp, 1, general_state_bo,
- INTEL_DOMAIN_RENDER,
- 0);
- ilo_cp_write_bo(cp, 1, surface_state_bo,
- INTEL_DOMAIN_SAMPLER,
- 0);
- ilo_cp_write_bo(cp, 1, dynamic_state_bo,
- INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
- 0);
- ilo_cp_write_bo(cp, 1, indirect_object_bo,
- 0,
- 0);
- ilo_cp_write_bo(cp, 1, instruction_bo,
- INTEL_DOMAIN_INSTRUCTION,
- 0);
-
- if (general_state_size) {
- ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo,
- INTEL_DOMAIN_RENDER,
- 0);
- }
- else {
- /* skip range check */
- ilo_cp_write(cp, 1);
- }
-
- if (dynamic_state_size) {
- ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo,
- INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
- 0);
- }
- else {
- /* skip range check */
- ilo_cp_write(cp, 0xfffff000 + 1);
- }
-
- if (indirect_object_size) {
- ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo,
- 0,
- 0);
- }
- else {
- /* skip range check */
- ilo_cp_write(cp, 0xfffff000 + 1);
- }
-
- if (instruction_size) {
- ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo,
- INTEL_DOMAIN_INSTRUCTION,
- 0);
- }
- else {
- /* skip range check */
- ilo_cp_write(cp, 1);
- }
-
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_STATE_SIP(const struct ilo_dev_info *dev,
- uint32_t sip,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02);
- const uint8_t cmd_len = 2;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- ilo_cp_begin(cp, cmd_len | (cmd_len - 2));
- ilo_cp_write(cp, cmd);
- ilo_cp_write(cp, sip);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev,
- bool enable,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b);
- const uint8_t cmd_len = 1;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | enable);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev,
- int pipeline,
- struct ilo_cp *cp)
-{
- const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04);
- const uint8_t cmd_len = 1;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- /* 3D or media */
- assert(pipeline == 0x0 || pipeline == 0x1);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | pipeline);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev,
- int max_threads, int num_urb_entries,
- int urb_entry_size,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00);
- const uint8_t cmd_len = 8;
- uint32_t dw2, dw4;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- dw2 = (max_threads - 1) << 16 |
- num_urb_entries << 8 |
- 1 << 7 | /* Reset Gateway Timer */
- 1 << 6; /* Bypass Gateway Control */
-
- dw4 = urb_entry_size << 16 | /* URB Entry Allocation Size */
- 480; /* CURBE Allocation Size */
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0); /* scratch */
- ilo_cp_write(cp, dw2);
- ilo_cp_write(cp, 0); /* MBZ */
- ilo_cp_write(cp, dw4);
- ilo_cp_write(cp, 0); /* scoreboard */
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev,
- uint32_t buf, int size,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01);
- const uint8_t cmd_len = 4;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- assert(buf % 32 == 0);
- /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
- size = align(size, 32);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0); /* MBZ */
- ilo_cp_write(cp, size);
- ilo_cp_write(cp, buf);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev,
- uint32_t offset, int num_ids,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02);
- const uint8_t cmd_len = 4;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- assert(offset % 32 == 0);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0); /* MBZ */
- /* every ID has 8 DWords */
- ilo_cp_write(cp, num_ids * 8 * 4);
- ilo_cp_write(cp, offset);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev,
- int id, int byte, int thread_count,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03);
- const uint8_t cmd_len = 2;
- uint32_t dw1;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- dw1 = id << 16 |
- byte << 8 |
- thread_count;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, dw1);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev,
- int thread_count_water_mark,
- int barrier_mask,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04);
- const uint8_t cmd_len = 2;
- uint32_t dw1;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- dw1 = thread_count_water_mark << 16 |
- barrier_mask;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, dw1);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev,
- struct ilo_cp *cp)
-{
- assert(!"MEDIA_OBJECT_WALKER unsupported");
-}
-
-static void
-gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev,
- uint32_t vs_binding_table,
- uint32_t gs_binding_table,
- uint32_t ps_binding_table,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01);
- const uint8_t cmd_len = 4;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2) |
- GEN6_BINDING_TABLE_MODIFY_VS |
- GEN6_BINDING_TABLE_MODIFY_GS |
- GEN6_BINDING_TABLE_MODIFY_PS);
- ilo_cp_write(cp, vs_binding_table);
- ilo_cp_write(cp, gs_binding_table);
- ilo_cp_write(cp, ps_binding_table);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev,
- uint32_t vs_sampler_state,
- uint32_t gs_sampler_state,
- uint32_t ps_sampler_state,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02);
- const uint8_t cmd_len = 4;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2) |
- VS_SAMPLER_STATE_CHANGE |
- GS_SAMPLER_STATE_CHANGE |
- PS_SAMPLER_STATE_CHANGE);
- ilo_cp_write(cp, vs_sampler_state);
- ilo_cp_write(cp, gs_sampler_state);
- ilo_cp_write(cp, ps_sampler_state);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev,
- int vs_total_size, int gs_total_size,
- int vs_entry_size, int gs_entry_size,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05);
- const uint8_t cmd_len = 3;
- const int row_size = 128; /* 1024 bits */
- int vs_alloc_size, gs_alloc_size;
- int vs_num_entries, gs_num_entries;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- /* in 1024-bit URB rows */
- vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
- gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
-
- /* the valid range is [1, 5] */
- if (!vs_alloc_size)
- vs_alloc_size = 1;
- if (!gs_alloc_size)
- gs_alloc_size = 1;
- assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
-
- /* the valid range is [24, 256] in multiples of 4 */
- vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
- if (vs_num_entries > 256)
- vs_num_entries = 256;
- assert(vs_num_entries >= 24);
-
- /* the valid range is [0, 256] in multiples of 4 */
- gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
- if (gs_num_entries > 256)
- gs_num_entries = 256;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_VS_SIZE_SHIFT |
- vs_num_entries << GEN6_URB_VS_ENTRIES_SHIFT);
- ilo_cp_write(cp, gs_num_entries << GEN6_URB_GS_ENTRIES_SHIFT |
- (gs_alloc_size - 1) << GEN6_URB_GS_SIZE_SHIFT);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
- const struct ilo_ve_state *ve,
- const struct ilo_vb_state *vb,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08);
- uint8_t cmd_len;
- unsigned hw_idx;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 82:
- *
- * "From 1 to 33 VBs can be specified..."
- */
- assert(ve->vb_count <= 33);
-
- if (!ve->vb_count)
- return;
-
- cmd_len = 1 + 4 * ve->vb_count;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
-
- for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
- const unsigned instance_divisor = ve->instance_divisors[hw_idx];
- const unsigned pipe_idx = ve->vb_mapping[hw_idx];
- const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx];
- uint32_t dw;
-
- dw = hw_idx << GEN6_VB0_INDEX_SHIFT;
-
- if (instance_divisor)
- dw |= GEN6_VB0_ACCESS_INSTANCEDATA;
- else
- dw |= GEN6_VB0_ACCESS_VERTEXDATA;
-
- if (dev->gen >= ILO_GEN(7))
- dw |= GEN7_VB0_ADDRESS_MODIFYENABLE;
-
- /* use null vb if there is no buffer or the stride is out of range */
- if (cso->buffer && cso->stride <= 2048) {
- const struct ilo_buffer *buf = ilo_buffer(cso->buffer);
- const uint32_t start_offset = cso->buffer_offset;
- /*
- * As noted in ilo_translate_format(), we treat some 3-component
- * formats as 4-component formats to work around hardware
- * limitations. Imagine the case where the vertex buffer holds a
- * single PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6.
- * The hardware would not be able to fetch it because the vertex
- * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
- * and that takes at least 8 bytes.
- *
- * For the workaround to work, we query the physical size, which is
- * page aligned, to calculate end_offset so that the last vertex has
- * a better chance to be fetched.
- */
- const uint32_t end_offset = intel_bo_get_size(buf->bo) - 1;
-
- dw |= cso->stride << BRW_VB0_PITCH_SHIFT;
-
- ilo_cp_write(cp, dw);
- ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
- ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
- ilo_cp_write(cp, instance_divisor);
- }
- else {
- dw |= 1 << 13;
-
- ilo_cp_write(cp, dw);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, instance_divisor);
- }
- }
-
- ilo_cp_end(cp);
-}
-
-static void
-ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
- struct ilo_ve_cso *cso)
-{
- int format;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 94:
- *
- * "- This bit (Edge Flag Enable) must only be ENABLED on the last
- * valid VERTEX_ELEMENT structure.
- *
- * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
- * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
- *
- * - The Source Element Format must be set to the UINT format.
- *
- * - [DevSNB]: Edge Flags are not supported for QUADLIST
- * primitives. Software may elect to convert QUADLIST primitives
- * to some set of corresponding edge-flag-supported primitive
- * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
- */
-
- cso->payload[0] |= GEN6_VE0_EDGE_FLAG_ENABLE;
- cso->payload[1] =
- BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
- BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_1_SHIFT |
- BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT |
- BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT;
-
- /*
- * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via
- * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined
- * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
- *
- * Since all the hardware cares about is whether the flags are zero or not,
- * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case.
- */
- format = (cso->payload[0] >> BRW_VE0_FORMAT_SHIFT) & 0x1ff;
- if (format == BRW_SURFACEFORMAT_R32_FLOAT) {
- STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT ==
- BRW_SURFACEFORMAT_R32_FLOAT - 1);
-
- cso->payload[0] -= (1 << BRW_VE0_FORMAT_SHIFT);
- }
- else {
- assert(format == BRW_SURFACEFORMAT_R8_UINT);
- }
-}
-
-static void
-ve_init_cso_with_components(const struct ilo_dev_info *dev,
- int comp0, int comp1, int comp2, int comp3,
- struct ilo_ve_cso *cso)
-{
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- STATIC_ASSERT(Elements(cso->payload) >= 2);
- cso->payload[0] = GEN6_VE0_VALID;
- cso->payload[1] =
- comp0 << BRW_VE1_COMPONENT_0_SHIFT |
- comp1 << BRW_VE1_COMPONENT_1_SHIFT |
- comp2 << BRW_VE1_COMPONENT_2_SHIFT |
- comp3 << BRW_VE1_COMPONENT_3_SHIFT;
-}
-
static void
ve_init_cso(const struct ilo_dev_info *dev,
const struct pipe_vertex_element *state,
for (i = 0; i < num_states; i++) {
const unsigned pipe_idx = states[i].vertex_buffer_index;
- const unsigned instance_divisor = states[i].instance_divisor;
- unsigned hw_idx;
-
- /*
- * map the pipe vb to the hardware vb, which has a fixed instance
- * divisor
- */
- for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
- if (ve->vb_mapping[hw_idx] == pipe_idx &&
- ve->instance_divisors[hw_idx] == instance_divisor)
- break;
- }
-
- /* create one if there is no matching hardware vb */
- if (hw_idx >= ve->vb_count) {
- hw_idx = ve->vb_count++;
-
- ve->vb_mapping[hw_idx] = pipe_idx;
- ve->instance_divisors[hw_idx] = instance_divisor;
- }
-
- ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
- }
-}
-
-static void
-gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
- const struct ilo_ve_state *ve,
- bool last_velement_edgeflag,
- bool prepend_generated_ids,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09);
- uint8_t cmd_len;
- unsigned i;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 93:
- *
- * "Up to 34 (DevSNB+) vertex elements are supported."
- */
- assert(ve->count + prepend_generated_ids <= 34);
-
- if (!ve->count && !prepend_generated_ids) {
- struct ilo_ve_cso dummy;
-
- ve_init_cso_with_components(dev,
- BRW_VE1_COMPONENT_STORE_0,
- BRW_VE1_COMPONENT_STORE_0,
- BRW_VE1_COMPONENT_STORE_0,
- BRW_VE1_COMPONENT_STORE_1_FLT,
- &dummy);
-
- cmd_len = 3;
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write_multi(cp, dummy.payload, 2);
- ilo_cp_end(cp);
-
- return;
- }
-
- cmd_len = 2 * (ve->count + prepend_generated_ids) + 1;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
-
- if (prepend_generated_ids) {
- struct ilo_ve_cso gen_ids;
-
- ve_init_cso_with_components(dev,
- BRW_VE1_COMPONENT_STORE_VID,
- BRW_VE1_COMPONENT_STORE_IID,
- BRW_VE1_COMPONENT_NOSTORE,
- BRW_VE1_COMPONENT_NOSTORE,
- &gen_ids);
-
- ilo_cp_write_multi(cp, gen_ids.payload, 2);
- }
-
- if (last_velement_edgeflag) {
- struct ilo_ve_cso edgeflag;
-
- for (i = 0; i < ve->count - 1; i++)
- ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
-
- edgeflag = ve->cso[i];
- ve_set_cso_edgeflag(dev, &edgeflag);
- ilo_cp_write_multi(cp, edgeflag.payload, 2);
- }
- else {
- for (i = 0; i < ve->count; i++)
- ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
- }
-
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev,
- const struct ilo_ib_state *ib,
- bool enable_cut_index,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a);
- const uint8_t cmd_len = 3;
- struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
- uint32_t start_offset, end_offset;
- int format;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- if (!buf)
- return;
-
- format = gen6_translate_index_size(ib->hw_index_size);
-
- /*
- * set start_offset to 0 here and adjust pipe_draw_info::start with
- * ib->draw_start_offset in 3DPRIMITIVE
- */
- start_offset = 0;
- end_offset = buf->bo_size;
-
- /* end_offset must also be aligned and is inclusive */
- end_offset -= (end_offset % ib->hw_index_size);
- end_offset--;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2) |
- ((enable_cut_index) ? BRW_CUT_INDEX_ENABLE : 0) |
- format << 8);
- ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
- ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev,
- uint32_t clip_viewport,
- uint32_t sf_viewport,
- uint32_t cc_viewport,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d);
- const uint8_t cmd_len = 4;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2) |
- GEN6_CLIP_VIEWPORT_MODIFY |
- GEN6_SF_VIEWPORT_MODIFY |
- GEN6_CC_VIEWPORT_MODIFY);
- ilo_cp_write(cp, clip_viewport);
- ilo_cp_write(cp, sf_viewport);
- ilo_cp_write(cp, cc_viewport);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
- uint32_t blend_state,
- uint32_t depth_stencil_state,
- uint32_t color_calc_state,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e);
- const uint8_t cmd_len = 4;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
+ const unsigned instance_divisor = states[i].instance_divisor;
+ unsigned hw_idx;
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, blend_state | 1);
- ilo_cp_write(cp, depth_stencil_state | 1);
- ilo_cp_write(cp, color_calc_state | 1);
- ilo_cp_end(cp);
-}
+ /*
+ * map the pipe vb to the hardware vb, which has a fixed instance
+ * divisor
+ */
+ for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
+ if (ve->vb_mapping[hw_idx] == pipe_idx &&
+ ve->instance_divisors[hw_idx] == instance_divisor)
+ break;
+ }
-static void
-gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev,
- uint32_t scissor_rect,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f);
- const uint8_t cmd_len = 2;
+ /* create one if there is no matching hardware vb */
+ if (hw_idx >= ve->vb_count) {
+ hw_idx = ve->vb_count++;
- ILO_GPE_VALID_GEN(dev, 6, 7);
+ ve->vb_mapping[hw_idx] = pipe_idx;
+ ve->instance_divisors[hw_idx] = instance_divisor;
+ }
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, scissor_rect);
- ilo_cp_end(cp);
+ ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
+ }
}
void
cso->payload[2] = dw5;
}
-static void
-gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
- const struct ilo_shader_state *vs,
- int num_samplers,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10);
- const uint8_t cmd_len = 6;
- const struct ilo_shader_cso *cso;
- uint32_t dw2, dw4, dw5;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- if (!vs) {
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
- return;
- }
-
- cso = ilo_shader_get_kernel_cso(vs);
- dw2 = cso->payload[0];
- dw4 = cso->payload[1];
- dw5 = cso->payload[2];
-
- dw2 |= ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, ilo_shader_get_kernel_offset(vs));
- ilo_cp_write(cp, dw2);
- ilo_cp_write(cp, 0); /* scratch */
- ilo_cp_write(cp, dw4);
- ilo_cp_write(cp, dw5);
- ilo_cp_end(cp);
-}
-
void
ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev,
const struct ilo_shader_state *gs,
cso->payload[3] = dw6;
}
-static void
-gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
- const struct ilo_shader_state *gs,
- const struct ilo_shader_state *vs,
- int verts_per_prim,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
- const uint8_t cmd_len = 7;
- uint32_t dw1, dw2, dw4, dw5, dw6;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- if (gs) {
- const struct ilo_shader_cso *cso;
-
- dw1 = ilo_shader_get_kernel_offset(gs);
-
- cso = ilo_shader_get_kernel_cso(gs);
- dw2 = cso->payload[0];
- dw4 = cso->payload[1];
- dw5 = cso->payload[2];
- dw6 = cso->payload[3];
- }
- else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) {
- struct ilo_shader_cso cso;
- enum ilo_kernel_param param;
-
- switch (verts_per_prim) {
- case 1:
- param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
- break;
- case 2:
- param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
- break;
- default:
- param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
- break;
- }
-
- dw1 = ilo_shader_get_kernel_offset(vs) +
- ilo_shader_get_kernel_param(vs, param);
-
- /* cannot use VS's CSO */
- ilo_gpe_init_gs_cso_gen6(dev, vs, &cso);
- dw2 = cso.payload[0];
- dw4 = cso.payload[1];
- dw5 = cso.payload[2];
- dw6 = cso.payload[3];
- }
- else {
- dw1 = 0;
- dw2 = 0;
- dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT;
- dw5 = GEN6_GS_STATISTICS_ENABLE;
- dw6 = 0;
- }
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, dw1);
- ilo_cp_write(cp, dw2);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, dw4);
- ilo_cp_write(cp, dw5);
- ilo_cp_write(cp, dw6);
- ilo_cp_end(cp);
-}
-
void
ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev,
const struct pipe_rasterizer_state *state,
clip->can_enable_guardband = false;
}
-static void
-gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
- const struct ilo_rasterizer_state *rasterizer,
- const struct ilo_shader_state *fs,
- bool enable_guardband,
- int num_viewports,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12);
- const uint8_t cmd_len = 4;
- uint32_t dw1, dw2, dw3;
-
- if (rasterizer) {
- int interps;
-
- dw1 = rasterizer->clip.payload[0];
- dw2 = rasterizer->clip.payload[1];
- dw3 = rasterizer->clip.payload[2];
-
- if (enable_guardband && rasterizer->clip.can_enable_guardband)
- dw2 |= GEN6_CLIP_GB_TEST;
-
- interps = (fs) ? ilo_shader_get_kernel_param(fs,
- ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0;
-
- if (interps & (1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC |
- 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC |
- 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC))
- dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
-
- dw3 |= GEN6_CLIP_FORCE_ZERO_RTAINDEX |
- (num_viewports - 1);
- }
- else {
- dw1 = 0;
- dw2 = 0;
- dw3 = 0;
- }
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, dw1);
- ilo_cp_write(cp, dw2);
- ilo_cp_write(cp, dw3);
- ilo_cp_end(cp);
-}
-
void
ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
const struct pipe_rasterizer_state *state,
}
}
-/**
- * Fill in DW2 to DW7 of 3DSTATE_SF.
- */
-void
-ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
- const struct ilo_rasterizer_state *rasterizer,
- int num_samples,
- enum pipe_format depth_format,
- uint32_t *payload, unsigned payload_len)
-{
- const struct ilo_rasterizer_sf *sf = &rasterizer->sf;
-
- assert(payload_len == Elements(sf->payload));
-
- if (sf) {
- memcpy(payload, sf->payload, sizeof(sf->payload));
-
- if (num_samples > 1)
- payload[1] |= sf->dw_msaa;
-
- if (dev->gen >= ILO_GEN(7)) {
- int format;
-
- /* separate stencil */
- switch (depth_format) {
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- depth_format = PIPE_FORMAT_Z24X8_UNORM;
- break;
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- depth_format = PIPE_FORMAT_Z32_FLOAT;;
- break;
- case PIPE_FORMAT_S8_UINT:
- depth_format = PIPE_FORMAT_NONE;
- break;
- default:
- break;
- }
-
- format = gen6_translate_depth_format(depth_format);
- /* FLOAT surface is assumed when there is no depth buffer */
- if (format < 0)
- format = BRW_DEPTHFORMAT_D32_FLOAT;
-
- payload[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
- }
- }
- else {
- payload[0] = 0;
- payload[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0;
- payload[2] = 0;
- payload[3] = 0;
- payload[4] = 0;
- payload[5] = 0;
- }
-}
-
-/**
- * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
- */
-void
-ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
- const struct ilo_rasterizer_state *rasterizer,
- const struct ilo_shader_state *fs,
- const struct ilo_shader_state *last_sh,
- uint32_t *dw, int num_dwords)
-{
- int output_count, vue_offset, vue_len;
- const struct ilo_kernel_routing *routing;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
- assert(num_dwords == 13);
-
- if (!fs) {
- memset(dw, 0, sizeof(dw[0]) * num_dwords);
-
- if (dev->gen >= ILO_GEN(7))
- dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT;
- else
- dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT;
-
- return;
- }
-
- output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
- assert(output_count <= 32);
-
- routing = ilo_shader_get_kernel_routing(fs);
-
- vue_offset = routing->source_skip;
- assert(vue_offset % 2 == 0);
- vue_offset /= 2;
-
- vue_len = (routing->source_len + 1) / 2;
- if (!vue_len)
- vue_len = 1;
-
- if (dev->gen >= ILO_GEN(7)) {
- dw[0] = output_count << GEN7_SBE_NUM_OUTPUTS_SHIFT |
- vue_len << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
- vue_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
- if (routing->swizzle_enable)
- dw[0] |= GEN7_SBE_SWIZZLE_ENABLE;
- }
- else {
- dw[0] = output_count << GEN6_SF_NUM_OUTPUTS_SHIFT |
- vue_len << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
- vue_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
- if (routing->swizzle_enable)
- dw[0] |= GEN6_SF_SWIZZLE_ENABLE;
- }
-
- switch (rasterizer->state.sprite_coord_mode) {
- case PIPE_SPRITE_COORD_UPPER_LEFT:
- dw[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT;
- break;
- case PIPE_SPRITE_COORD_LOWER_LEFT:
- dw[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT;
- break;
- }
-
- STATIC_ASSERT(Elements(routing->swizzles) >= 16);
- memcpy(&dw[1], routing->swizzles, 2 * 16);
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 268:
- *
- * "This field (Point Sprite Texture Coordinate Enable) must be
- * programmed to 0 when non-point primitives are rendered."
- *
- * TODO We do not check that yet.
- */
- dw[9] = routing->point_sprite_enable;
-
- dw[10] = routing->const_interp_enable;
-
- /* WrapShortest enables */
- dw[11] = 0;
- dw[12] = 0;
-}
-
-static void
-gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
- const struct ilo_rasterizer_state *rasterizer,
- const struct ilo_shader_state *fs,
- const struct ilo_shader_state *last_sh,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
- const uint8_t cmd_len = 20;
- uint32_t payload_raster[6], payload_sbe[13];
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer,
- 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster));
- ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
- fs, last_sh, payload_sbe, Elements(payload_sbe));
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, payload_sbe[0]);
- ilo_cp_write_multi(cp, payload_raster, 6);
- ilo_cp_write_multi(cp, &payload_sbe[1], 12);
- ilo_cp_end(cp);
-}
-
void
ilo_gpe_init_rasterizer_wm_gen6(const struct ilo_dev_info *dev,
const struct pipe_rasterizer_state *state,
dw2 = (true) ? 0 : GEN6_WM_FLOATING_POINT_MODE_ALT;
- dw4 = start_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0 |
- 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1 |
- 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2;
-
- dw5 = (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 275:
- *
- * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
- * PS kernel or color calculator has the ability to kill (discard)
- * pixels or samples, other than due to depth or stencil testing.
- * This bit is required to be ENABLED in the following situations:
- *
- * The API pixel shader program contains "killpix" or "discard"
- * instructions, or other code in the pixel shader kernel that can
- * cause the final pixel mask to differ from the pixel mask received
- * on dispatch.
- *
- * A sampler with chroma key enabled with kill pixel mode is used by
- * the pixel shader.
- *
- * Any render target has Alpha Test Enable or AlphaToCoverage Enable
- * enabled.
- *
- * The pixel shader kernel generates and outputs oMask.
- *
- * Note: As ClipDistance clipping is fully supported in hardware and
- * therefore not via PS instructions, there should be no need to
- * ENABLE this bit due to ClipDistance clipping."
- */
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
- dw5 |= GEN6_WM_KILL_ENABLE;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 275:
- *
- * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
- * field must be set to disabled."
- *
- * TODO This is not checked yet.
- */
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
- dw5 |= GEN6_WM_COMPUTED_DEPTH;
-
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
- dw5 |= GEN6_WM_USES_SOURCE_DEPTH;
-
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
- dw5 |= GEN6_WM_USES_SOURCE_W;
-
- /*
- * TODO set this bit only when
- *
- * a) fs writes colors and color is not masked, or
- * b) fs writes depth, or
- * c) fs or cc kills
- */
- if (true)
- dw5 |= GEN6_WM_DISPATCH_ENABLE;
-
- assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
- dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
-
- dw6 = input_count << GEN6_WM_NUM_SF_OUTPUTS_SHIFT |
- GEN6_WM_POSOFFSET_NONE |
- interps << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
-
- STATIC_ASSERT(Elements(cso->payload) >= 4);
- cso->payload[0] = dw2;
- cso->payload[1] = dw4;
- cso->payload[2] = dw5;
- cso->payload[3] = dw6;
-}
-
-static void
-gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
- const struct ilo_shader_state *fs,
- int num_samplers,
- const struct ilo_rasterizer_state *rasterizer,
- bool dual_blend, bool cc_may_kill,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
- const uint8_t cmd_len = 9;
- const int num_samples = 1;
- const struct ilo_shader_cso *fs_cso;
- uint32_t dw2, dw4, dw5, dw6;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- if (!fs) {
- /* see brwCreateContext() */
- const int max_threads = (dev->gt == 2) ? 80 : 40;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- /* honor the valid range even if dispatching is disabled */
- ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
-
- return;
- }
-
- fs_cso = ilo_shader_get_kernel_cso(fs);
- dw2 = fs_cso->payload[0];
- dw4 = fs_cso->payload[1];
- dw5 = fs_cso->payload[2];
- dw6 = fs_cso->payload[3];
-
- dw2 |= (num_samplers + 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT;
-
- if (true) {
- dw4 |= GEN6_WM_STATISTICS_ENABLE;
- }
- else {
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 248:
- *
- * "This bit (Statistics Enable) must be disabled if either of these
- * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer
- * Resolve Enable or Depth Buffer Resolve Enable."
- */
- dw4 |= GEN6_WM_DEPTH_CLEAR;
- dw4 |= GEN6_WM_DEPTH_RESOLVE;
- dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
- }
-
- if (cc_may_kill) {
- dw5 |= GEN6_WM_KILL_ENABLE |
- GEN6_WM_DISPATCH_ENABLE;
- }
-
- if (dual_blend)
- dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
-
- dw5 |= rasterizer->wm.payload[0];
-
- dw6 |= rasterizer->wm.payload[1];
-
- if (num_samples > 1) {
- dw6 |= rasterizer->wm.dw_msaa_rast |
- rasterizer->wm.dw_msaa_disp;
- }
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
- ilo_cp_write(cp, dw2);
- ilo_cp_write(cp, 0); /* scratch */
- ilo_cp_write(cp, dw4);
- ilo_cp_write(cp, dw5);
- ilo_cp_write(cp, dw6);
- ilo_cp_write(cp, 0); /* kernel 1 */
- ilo_cp_write(cp, 0); /* kernel 2 */
- ilo_cp_end(cp);
-}
-
-static unsigned
-gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
- const uint32_t *bufs, const int *sizes,
- int num_bufs, int max_read_length,
- uint32_t *dw, int num_dwords)
-{
- unsigned enabled = 0x0;
- int total_read_length, i;
-
- assert(num_dwords == 4);
-
- total_read_length = 0;
- for (i = 0; i < 4; i++) {
- if (i < num_bufs && sizes[i]) {
- /* in 256-bit units minus one */
- const int read_len = (sizes[i] + 31) / 32 - 1;
-
- assert(bufs[i] % 32 == 0);
- assert(read_len < 32);
-
- enabled |= 1 << i;
- dw[i] = bufs[i] | read_len;
-
- total_read_length += read_len + 1;
- }
- else {
- dw[i] = 0;
- }
- }
-
- assert(total_read_length <= max_read_length);
-
- return enabled;
-}
-
-static void
-gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
- const uint32_t *bufs, const int *sizes,
- int num_bufs,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15);
- const uint8_t cmd_len = 5;
- uint32_t buf_dw[4], buf_enabled;
+ dw4 = start_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0 |
+ 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1 |
+ 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2;
- ILO_GPE_VALID_GEN(dev, 6, 6);
- assert(num_bufs <= 4);
+ dw5 = (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
/*
- * From the Sandy Bridge PRM, volume 2 part 1, page 138:
+ * From the Sandy Bridge PRM, volume 2 part 1, page 275:
+ *
+ * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
+ * PS kernel or color calculator has the ability to kill (discard)
+ * pixels or samples, other than due to depth or stencil testing.
+ * This bit is required to be ENABLED in the following situations:
+ *
+ * The API pixel shader program contains "killpix" or "discard"
+ * instructions, or other code in the pixel shader kernel that can
+ * cause the final pixel mask to differ from the pixel mask received
+ * on dispatch.
+ *
+ * A sampler with chroma key enabled with kill pixel mode is used by
+ * the pixel shader.
+ *
+ * Any render target has Alpha Test Enable or AlphaToCoverage Enable
+ * enabled.
*
- * "The sum of all four read length fields (each incremented to
- * represent the actual read length) must be less than or equal to 32"
+ * The pixel shader kernel generates and outputs oMask.
+ *
+ * Note: As ClipDistance clipping is fully supported in hardware and
+ * therefore not via PS instructions, there should be no need to
+ * ENABLE this bit due to ClipDistance clipping."
*/
- buf_enabled = gen6_fill_3dstate_constant(dev,
- bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
- ilo_cp_write(cp, buf_dw[0]);
- ilo_cp_write(cp, buf_dw[1]);
- ilo_cp_write(cp, buf_dw[2]);
- ilo_cp_write(cp, buf_dw[3]);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
- const uint32_t *bufs, const int *sizes,
- int num_bufs,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16);
- const uint8_t cmd_len = 5;
- uint32_t buf_dw[4], buf_enabled;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
- assert(num_bufs <= 4);
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
+ dw5 |= GEN6_WM_KILL_ENABLE;
/*
- * From the Sandy Bridge PRM, volume 2 part 1, page 161:
+ * From the Sandy Bridge PRM, volume 2 part 1, page 275:
+ *
+ * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
+ * field must be set to disabled."
*
- * "The sum of all four read length fields (each incremented to
- * represent the actual read length) must be less than or equal to 64"
+ * TODO This is not checked yet.
*/
- buf_enabled = gen6_fill_3dstate_constant(dev,
- bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
- ilo_cp_write(cp, buf_dw[0]);
- ilo_cp_write(cp, buf_dw[1]);
- ilo_cp_write(cp, buf_dw[2]);
- ilo_cp_write(cp, buf_dw[3]);
- ilo_cp_end(cp);
-}
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
+ dw5 |= GEN6_WM_COMPUTED_DEPTH;
-static void
-gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
- const uint32_t *bufs, const int *sizes,
- int num_bufs,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17);
- const uint8_t cmd_len = 5;
- uint32_t buf_dw[4], buf_enabled;
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
+ dw5 |= GEN6_WM_USES_SOURCE_DEPTH;
- ILO_GPE_VALID_GEN(dev, 6, 6);
- assert(num_bufs <= 4);
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
+ dw5 |= GEN6_WM_USES_SOURCE_W;
/*
- * From the Sandy Bridge PRM, volume 2 part 1, page 287:
+ * TODO set this bit only when
*
- * "The sum of all four read length fields (each incremented to
- * represent the actual read length) must be less than or equal to 64"
+ * a) fs writes colors and color is not masked, or
+ * b) fs writes depth, or
+ * c) fs or cc kills
*/
- buf_enabled = gen6_fill_3dstate_constant(dev,
- bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
- ilo_cp_write(cp, buf_dw[0]);
- ilo_cp_write(cp, buf_dw[1]);
- ilo_cp_write(cp, buf_dw[2]);
- ilo_cp_write(cp, buf_dw[3]);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
- unsigned sample_mask,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
- const uint8_t cmd_len = 2;
- const unsigned valid_mask = 0xf;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- sample_mask &= valid_mask;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, sample_mask);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev,
- unsigned x, unsigned y,
- unsigned width, unsigned height,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00);
- const uint8_t cmd_len = 4;
- unsigned xmax = x + width - 1;
- unsigned ymax = y + height - 1;
- int rect_limit;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- if (dev->gen >= ILO_GEN(7)) {
- rect_limit = 16383;
- }
- else {
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 230:
- *
- * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
- * must be an even number"
- */
- assert(y % 2 == 0);
-
- rect_limit = 8191;
- }
-
- if (x > rect_limit) x = rect_limit;
- if (y > rect_limit) y = rect_limit;
- if (xmax > rect_limit) xmax = rect_limit;
- if (ymax > rect_limit) ymax = rect_limit;
+ if (true)
+ dw5 |= GEN6_WM_DISPATCH_ENABLE;
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, y << 16 | x);
- ilo_cp_write(cp, ymax << 16 | xmax);
+ assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
+ dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
- /*
- * There is no need to set the origin. It is intended to support front
- * buffer rendering.
- */
- ilo_cp_write(cp, 0);
+ dw6 = input_count << GEN6_WM_NUM_SF_OUTPUTS_SHIFT |
+ GEN6_WM_POSOFFSET_NONE |
+ interps << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
- ilo_cp_end(cp);
+ STATIC_ASSERT(Elements(cso->payload) >= 4);
+ cso->payload[0] = dw2;
+ cso->payload[1] = dw4;
+ cso->payload[2] = dw5;
+ cso->payload[3] = dw6;
}
struct ilo_zs_surface_info {
zs->payload[4] = dw5;
zs->payload[5] = dw6;
- /* do not increment reference count */
- zs->bo = info.zs.bo;
-
- /* separate stencil */
- if (info.stencil.bo) {
- assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 &&
- info.stencil.stride % 128 == 0);
-
- zs->payload[6] = info.stencil.stride - 1;
- zs->payload[7] = info.stencil.offset;
-
- /* do not increment reference count */
- zs->separate_s8_bo = info.stencil.bo;
- }
- else {
- zs->payload[6] = 0;
- zs->payload[7] = 0;
- zs->separate_s8_bo = NULL;
- }
-
- /* hiz */
- if (info.hiz.bo) {
- zs->payload[8] = info.hiz.stride - 1;
- zs->payload[9] = info.hiz.offset;
-
- /* do not increment reference count */
- zs->hiz_bo = info.hiz.bo;
- }
- else {
- zs->payload[8] = 0;
- zs->payload[9] = 0;
- zs->hiz_bo = NULL;
- }
-}
-
-static void
-gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
- const struct ilo_zs_surface *zs,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
- ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
- const uint8_t cmd_len = 7;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, zs->payload[0]);
- ilo_cp_write_bo(cp, zs->payload[1], zs->bo,
- INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
- ilo_cp_write(cp, zs->payload[2]);
- ilo_cp_write(cp, zs->payload[3]);
- ilo_cp_write(cp, zs->payload[4]);
- ilo_cp_write(cp, zs->payload[5]);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev,
- int x_offset, int y_offset,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06);
- const uint8_t cmd_len = 2;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
- assert(x_offset >= 0 && x_offset <= 31);
- assert(y_offset >= 0 && y_offset <= 31);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, x_offset << 8 | y_offset);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev,
- const struct pipe_poly_stipple *pattern,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07);
- const uint8_t cmd_len = 33;
- int i;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
- assert(Elements(pattern->stipple) == 32);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- for (i = 0; i < 32; i++)
- ilo_cp_write(cp, pattern->stipple[i]);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev,
- unsigned pattern, unsigned factor,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08);
- const uint8_t cmd_len = 3;
- unsigned inverse;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
- assert((pattern & 0xffff) == pattern);
- assert(factor >= 1 && factor <= 256);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, pattern);
-
- if (dev->gen >= ILO_GEN(7)) {
- /* in U1.16 */
- inverse = (unsigned) (65536.0f / factor);
- ilo_cp_write(cp, inverse << 15 | factor);
- }
- else {
- /* in U1.13 */
- inverse = (unsigned) (8192.0f / factor);
- ilo_cp_write(cp, inverse << 16 | factor);
- }
-
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a);
- const uint8_t cmd_len = 3;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0 << 16 | 0);
- ilo_cp_write(cp, 0 << 16 | 0);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev,
- int index, unsigned svbi,
- unsigned max_svbi,
- bool load_vertex_count,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b);
- const uint8_t cmd_len = 4;
- uint32_t dw1;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
- assert(index >= 0 && index < 4);
-
- dw1 = index << SVB_INDEX_SHIFT;
- if (load_vertex_count)
- dw1 |= SVB_LOAD_INTERNAL_VERTEX_COUNT;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, dw1);
- ilo_cp_write(cp, svbi);
- ilo_cp_write(cp, max_svbi);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev,
- int num_samples,
- const uint32_t *packed_sample_pos,
- bool pixel_location_center,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d);
- const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3;
- uint32_t dw1, dw2, dw3;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- dw1 = (pixel_location_center) ?
- MS_PIXEL_LOCATION_CENTER : MS_PIXEL_LOCATION_UPPER_LEFT;
-
- switch (num_samples) {
- case 0:
- case 1:
- dw1 |= MS_NUMSAMPLES_1;
- dw2 = 0;
- dw3 = 0;
- break;
- case 4:
- dw1 |= MS_NUMSAMPLES_4;
- dw2 = packed_sample_pos[0];
- dw3 = 0;
- break;
- case 8:
- assert(dev->gen >= ILO_GEN(7));
- dw1 |= MS_NUMSAMPLES_8;
- dw2 = packed_sample_pos[0];
- dw3 = packed_sample_pos[1];
- break;
- default:
- assert(!"unsupported sample count");
- dw1 |= MS_NUMSAMPLES_1;
- dw2 = 0;
- dw3 = 0;
- break;
- }
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, dw1);
- ilo_cp_write(cp, dw2);
- if (dev->gen >= ILO_GEN(7))
- ilo_cp_write(cp, dw3);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev,
- const struct ilo_zs_surface *zs,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
- ILO_GPE_CMD(0x3, 0x0, 0x06) :
- ILO_GPE_CMD(0x3, 0x1, 0x0e);
- const uint8_t cmd_len = 3;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- /* see ilo_gpe_init_zs_surface() */
- ilo_cp_write(cp, zs->payload[6]);
- ilo_cp_write_bo(cp, zs->payload[7], zs->separate_s8_bo,
- INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev,
- const struct ilo_zs_surface *zs,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
- ILO_GPE_CMD(0x3, 0x0, 0x07) :
- ILO_GPE_CMD(0x3, 0x1, 0x0f);
- const uint8_t cmd_len = 3;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- /* see ilo_gpe_init_zs_surface() */
- ilo_cp_write(cp, zs->payload[8]);
- ilo_cp_write_bo(cp, zs->payload[9], zs->hiz_bo,
- INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
- uint32_t clear_val,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10);
- const uint8_t cmd_len = 2;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2) |
- GEN5_DEPTH_CLEAR_VALID);
- ilo_cp_write(cp, clear_val);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev,
- uint32_t dw1,
- struct intel_bo *bo, uint32_t bo_offset,
- bool write_qword,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00);
- const uint8_t cmd_len = (write_qword) ? 5 : 4;
- const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
- const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- if (dw1 & PIPE_CONTROL_CS_STALL) {
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 73:
- *
- * "1 of the following must also be set (when CS stall is set):
- *
- * * Depth Cache Flush Enable ([0] of DW1)
- * * Stall at Pixel Scoreboard ([1] of DW1)
- * * Depth Stall ([13] of DW1)
- * * Post-Sync Operation ([13] of DW1)
- * * Render Target Cache Flush Enable ([12] of DW1)
- * * Notify Enable ([8] of DW1)"
- *
- * From the Ivy Bridge PRM, volume 2 part 1, page 61:
- *
- * "One of the following must also be set (when CS stall is set):
- *
- * * Render Target Cache Flush Enable ([12] of DW1)
- * * Depth Cache Flush Enable ([0] of DW1)
- * * Stall at Pixel Scoreboard ([1] of DW1)
- * * Depth Stall ([13] of DW1)
- * * Post-Sync Operation ([13] of DW1)"
- */
- uint32_t bit_test = PIPE_CONTROL_WRITE_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_STALL_AT_SCOREBOARD |
- PIPE_CONTROL_DEPTH_STALL;
-
- /* post-sync op */
- bit_test |= PIPE_CONTROL_WRITE_IMMEDIATE |
- PIPE_CONTROL_WRITE_DEPTH_COUNT |
- PIPE_CONTROL_WRITE_TIMESTAMP;
-
- if (dev->gen == ILO_GEN(6))
- bit_test |= PIPE_CONTROL_INTERRUPT_ENABLE;
-
- assert(dw1 & bit_test);
- }
-
- if (dw1 & PIPE_CONTROL_DEPTH_STALL) {
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 73:
- *
- * "Following bits must be clear (when Depth Stall is set):
- *
- * * Render Target Cache Flush Enable ([12] of DW1)
- * * Depth Cache Flush Enable ([0] of DW1)"
- */
- assert(!(dw1 & (PIPE_CONTROL_WRITE_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
- }
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, dw1);
- ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
- ilo_cp_write(cp, 0);
- if (write_qword)
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
- const struct pipe_draw_info *info,
- const struct ilo_ib_state *ib,
- bool rectlist,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
- const uint8_t cmd_len = 6;
- const int prim = (rectlist) ?
- _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
- const int vb_access = (info->indexed) ?
- GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
- GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
- const uint32_t vb_start = info->start +
- ((info->indexed) ? ib->draw_start_offset : 0);
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2) |
- prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
- vb_access);
- ilo_cp_write(cp, info->count);
- ilo_cp_write(cp, vb_start);
- ilo_cp_write(cp, info->instance_count);
- ilo_cp_write(cp, info->start_instance);
- ilo_cp_write(cp, info->index_bias);
- ilo_cp_end(cp);
-}
-
-static uint32_t
-gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev,
- const struct ilo_shader_state **cs,
- uint32_t *sampler_state,
- int *num_samplers,
- uint32_t *binding_table_state,
- int *num_surfaces,
- int num_ids,
- struct ilo_cp *cp)
-{
- /*
- * From the Sandy Bridge PRM, volume 2 part 2, page 34:
- *
- * "(Interface Descriptor Total Length) This field must have the same
- * alignment as the Interface Descriptor Data Start Address.
- *
- * It must be DQWord (32-byte) aligned..."
- *
- * From the Sandy Bridge PRM, volume 2 part 2, page 35:
- *
- * "(Interface Descriptor Data Start Address) Specifies the 32-byte
- * aligned address of the Interface Descriptor data."
- */
- const int state_align = 32 / 4;
- const int state_len = (32 / 4) * num_ids;
- uint32_t state_offset, *dw;
- int i;
+ /* do not increment reference count */
+ zs->bo = info.zs.bo;
- ILO_GPE_VALID_GEN(dev, 6, 6);
+ /* separate stencil */
+ if (info.stencil.bo) {
+ assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 &&
+ info.stencil.stride % 128 == 0);
+
+ zs->payload[6] = info.stencil.stride - 1;
+ zs->payload[7] = info.stencil.offset;
- dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA",
- state_len, state_align, &state_offset);
-
- for (i = 0; i < num_ids; i++) {
- dw[0] = ilo_shader_get_kernel_offset(cs[i]);
- dw[1] = 1 << 18; /* SPF */
- dw[2] = sampler_state[i] |
- (num_samplers[i] + 3) / 4 << 2;
- dw[3] = binding_table_state[i] |
- num_surfaces[i];
- dw[4] = 0 << 16 | /* CURBE Read Length */
- 0; /* CURBE Read Offset */
- dw[5] = 0; /* Barrier ID */
- dw[6] = 0;
- dw[7] = 0;
-
- dw += 8;
+ /* do not increment reference count */
+ zs->separate_s8_bo = info.stencil.bo;
+ }
+ else {
+ zs->payload[6] = 0;
+ zs->payload[7] = 0;
+ zs->separate_s8_bo = NULL;
}
- return state_offset;
+ /* hiz */
+ if (info.hiz.bo) {
+ zs->payload[8] = info.hiz.stride - 1;
+ zs->payload[9] = info.hiz.offset;
+
+ /* do not increment reference count */
+ zs->hiz_bo = info.hiz.bo;
+ }
+ else {
+ zs->payload[8] = 0;
+ zs->payload[9] = 0;
+ zs->hiz_bo = NULL;
+ }
}
static void
vp->max_z = scale_z * 1.0f + state->translate[2];
}
-static uint32_t
-gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev,
- const struct ilo_viewport_cso *viewports,
- unsigned num_viewports,
- struct ilo_cp *cp)
-{
- const int state_align = 32 / 4;
- const int state_len = 8 * num_viewports;
- uint32_t state_offset, *dw;
- unsigned i;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 262:
- *
- * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
- * stored as an array of up to 16 elements..."
- */
- assert(num_viewports && num_viewports <= 16);
-
- dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT",
- state_len, state_align, &state_offset);
-
- for (i = 0; i < num_viewports; i++) {
- const struct ilo_viewport_cso *vp = &viewports[i];
-
- dw[0] = fui(vp->m00);
- dw[1] = fui(vp->m11);
- dw[2] = fui(vp->m22);
- dw[3] = fui(vp->m30);
- dw[4] = fui(vp->m31);
- dw[5] = fui(vp->m32);
- dw[6] = 0;
- dw[7] = 0;
-
- dw += 8;
- }
-
- return state_offset;
-}
-
-static uint32_t
-gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
- const struct ilo_viewport_cso *viewports,
- unsigned num_viewports,
- struct ilo_cp *cp)
-{
- const int state_align = 32 / 4;
- const int state_len = 4 * num_viewports;
- uint32_t state_offset, *dw;
- unsigned i;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 193:
- *
- * "The viewport-related state is stored as an array of up to 16
- * elements..."
- */
- assert(num_viewports && num_viewports <= 16);
-
- dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT",
- state_len, state_align, &state_offset);
-
- for (i = 0; i < num_viewports; i++) {
- const struct ilo_viewport_cso *vp = &viewports[i];
-
- dw[0] = fui(vp->min_gbx);
- dw[1] = fui(vp->max_gbx);
- dw[2] = fui(vp->min_gby);
- dw[3] = fui(vp->max_gby);
-
- dw += 4;
- }
-
- return state_offset;
-}
-
-static uint32_t
-gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev,
- const struct ilo_viewport_cso *viewports,
- unsigned num_viewports,
- struct ilo_cp *cp)
-{
- const int state_align = 32 / 4;
- const int state_len = 2 * num_viewports;
- uint32_t state_offset, *dw;
- unsigned i;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 385:
- *
- * "The viewport state is stored as an array of up to 16 elements..."
- */
- assert(num_viewports && num_viewports <= 16);
-
- dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT",
- state_len, state_align, &state_offset);
-
- for (i = 0; i < num_viewports; i++) {
- const struct ilo_viewport_cso *vp = &viewports[i];
-
- dw[0] = fui(vp->min_z);
- dw[1] = fui(vp->max_z);
-
- dw += 2;
- }
-
- return state_offset;
-}
-
-static uint32_t
-gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev,
- const struct pipe_stencil_ref *stencil_ref,
- float alpha_ref,
- const struct pipe_blend_color *blend_color,
- struct ilo_cp *cp)
-{
- const int state_align = 64 / 4;
- const int state_len = 6;
- uint32_t state_offset, *dw;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE",
- state_len, state_align, &state_offset);
-
- dw[0] = stencil_ref->ref_value[0] << 24 |
- stencil_ref->ref_value[1] << 16 |
- BRW_ALPHATEST_FORMAT_UNORM8;
- dw[1] = float_to_ubyte(alpha_ref);
- dw[2] = fui(blend_color->color[0]);
- dw[3] = fui(blend_color->color[1]);
- dw[4] = fui(blend_color->color[2]);
- dw[5] = fui(blend_color->color[3]);
-
- return state_offset;
-}
-
static int
gen6_blend_factor_dst_alpha_forced_one(int factor)
{
}
}
-static uint32_t
-gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
- const struct ilo_blend_state *blend,
- const struct ilo_fb_state *fb,
- const struct pipe_alpha_state *alpha,
- struct ilo_cp *cp)
-{
- const int state_align = 64 / 4;
- int state_len;
- uint32_t state_offset, *dw;
- unsigned num_targets, i;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 376:
- *
- * "The blend state is stored as an array of up to 8 elements..."
- */
- num_targets = fb->state.nr_cbufs;
- assert(num_targets <= 8);
-
- if (!num_targets) {
- if (!alpha->enabled)
- return 0;
- /* to be able to reference alpha func */
- num_targets = 1;
- }
-
- state_len = 2 * num_targets;
-
- dw = ilo_cp_steal_ptr(cp, "BLEND_STATE",
- state_len, state_align, &state_offset);
-
- for (i = 0; i < num_targets; i++) {
- const unsigned idx = (blend->independent_blend_enable) ? i : 0;
- const struct ilo_blend_cso *cso = &blend->cso[idx];
- const int num_samples = fb->num_samples;
- const struct util_format_description *format_desc =
- (idx < fb->state.nr_cbufs) ?
- util_format_description(fb->state.cbufs[idx]->format) : NULL;
- bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one;
-
- rt_is_unorm = true;
- rt_is_pure_integer = false;
- rt_dst_alpha_forced_one = false;
-
- if (format_desc) {
- int ch;
-
- switch (format_desc->format) {
- case PIPE_FORMAT_B8G8R8X8_UNORM:
- /* force alpha to one when the HW format has alpha */
- assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM)
- == BRW_SURFACEFORMAT_B8G8R8A8_UNORM);
- rt_dst_alpha_forced_one = true;
- break;
- default:
- break;
- }
-
- for (ch = 0; ch < 4; ch++) {
- if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID)
- continue;
-
- if (format_desc->channel[ch].pure_integer) {
- rt_is_unorm = false;
- rt_is_pure_integer = true;
- break;
- }
-
- if (!format_desc->channel[ch].normalized ||
- format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED)
- rt_is_unorm = false;
- }
- }
-
- dw[0] = cso->payload[0];
- dw[1] = cso->payload[1];
-
- if (!rt_is_pure_integer) {
- if (rt_dst_alpha_forced_one)
- dw[0] |= cso->dw_blend_dst_alpha_forced_one;
- else
- dw[0] |= cso->dw_blend;
- }
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 365:
- *
- * "Logic Ops are only supported on *_UNORM surfaces (excluding
- * _SRGB variants), otherwise Logic Ops must be DISABLED."
- *
- * Since logicop is ignored for non-UNORM color buffers, no special care
- * is needed.
- */
- if (rt_is_unorm)
- dw[1] |= cso->dw_logicop;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 356:
- *
- * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
- * Dither both must be disabled."
- *
- * There is no such limitation on GEN7, or for AlphaToOne. But GL
- * requires that anyway.
- */
- if (num_samples > 1)
- dw[1] |= cso->dw_alpha_mod;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 382:
- *
- * "Alpha Test can only be enabled if Pixel Shader outputs a float
- * alpha value."
- */
- if (alpha->enabled && !rt_is_pure_integer) {
- dw[1] |= 1 << 16 |
- gen6_translate_dsa_func(alpha->func) << 13;
- }
-
- dw += 2;
- }
-
- return state_offset;
-}
-
void
ilo_gpe_init_dsa(const struct ilo_dev_info *dev,
const struct pipe_depth_stencil_alpha_state *state,
dw[2] |= BRW_COMPAREFUNCTION_ALWAYS << 27;
}
-static uint32_t
-gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev,
- const struct ilo_dsa_state *dsa,
- struct ilo_cp *cp)
-{
- const int state_align = 64 / 4;
- const int state_len = 3;
- uint32_t state_offset, *dw;
-
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE",
- state_len, state_align, &state_offset);
-
- dw[0] = dsa->payload[0];
- dw[1] = dsa->payload[1];
- dw[2] = dsa->payload[2];
-
- return state_offset;
-}
-
void
ilo_gpe_set_scissor(const struct ilo_dev_info *dev,
unsigned start_slot,
}
}
-static uint32_t
-gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev,
- const struct ilo_scissor_state *scissor,
- unsigned num_viewports,
- struct ilo_cp *cp)
-{
- const int state_align = 32 / 4;
- const int state_len = 2 * num_viewports;
- uint32_t state_offset, *dw;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 263:
- *
- * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
- * stored as an array of up to 16 elements..."
- */
- assert(num_viewports && num_viewports <= 16);
-
- dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT",
- state_len, state_align, &state_offset);
-
- memcpy(dw, scissor->payload, state_len * 4);
-
- return state_offset;
-}
-
-static uint32_t
-gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev,
- uint32_t *surface_states,
- int num_surface_states,
- struct ilo_cp *cp)
-{
- const int state_align = 32 / 4;
- const int state_len = num_surface_states;
- uint32_t state_offset, *dw;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 69:
- *
- * "It is stored as an array of up to 256 elements..."
- */
- assert(num_surface_states <= 256);
-
- if (!num_surface_states)
- return 0;
-
- dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE",
- state_len, state_align, &state_offset);
- memcpy(dw, surface_states,
- num_surface_states * sizeof(surface_states[0]));
-
- return state_offset;
-}
-
void
ilo_gpe_init_view_surface_null_gen6(const struct ilo_dev_info *dev,
unsigned width, unsigned height,
surf->bo = tex->bo;
}
-static uint32_t
-gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev,
- const struct ilo_view_surface *surf,
- bool for_render,
- struct ilo_cp *cp)
-{
- const int state_align = 32 / 4;
- const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6;
- uint32_t state_offset;
- uint32_t read_domains, write_domain;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- if (for_render) {
- read_domains = INTEL_DOMAIN_RENDER;
- write_domain = INTEL_DOMAIN_RENDER;
- }
- else {
- read_domains = INTEL_DOMAIN_SAMPLER;
- write_domain = 0;
- }
-
- ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset);
-
- STATIC_ASSERT(Elements(surf->payload) >= 8);
-
- ilo_cp_write(cp, surf->payload[0]);
- ilo_cp_write_bo(cp, surf->payload[1],
- surf->bo, read_domains, write_domain);
- ilo_cp_write(cp, surf->payload[2]);
- ilo_cp_write(cp, surf->payload[3]);
- ilo_cp_write(cp, surf->payload[4]);
- ilo_cp_write(cp, surf->payload[5]);
-
- if (dev->gen >= ILO_GEN(7)) {
- ilo_cp_write(cp, surf->payload[6]);
- ilo_cp_write(cp, surf->payload[7]);
- }
-
- ilo_cp_end(cp);
-
- return state_offset;
-}
-
-static uint32_t
-gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev,
- const struct pipe_stream_output_target *so,
- const struct pipe_stream_output_info *so_info,
- int so_index,
- struct ilo_cp *cp)
-{
- struct ilo_buffer *buf = ilo_buffer(so->buffer);
- unsigned bo_offset, struct_size;
- enum pipe_format elem_format;
- struct ilo_view_surface surf;
-
- ILO_GPE_VALID_GEN(dev, 6, 6);
-
- bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
- struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
-
- switch (so_info->output[so_index].num_components) {
- case 1:
- elem_format = PIPE_FORMAT_R32_FLOAT;
- break;
- case 2:
- elem_format = PIPE_FORMAT_R32G32_FLOAT;
- break;
- case 3:
- elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
- break;
- case 4:
- elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- break;
- default:
- assert(!"unexpected SO components length");
- elem_format = PIPE_FORMAT_R32_FLOAT;
- break;
- }
-
- ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size,
- struct_size, elem_format, false, true, &surf);
-
- return gen6_emit_SURFACE_STATE(dev, &surf, false, cp);
-}
-
static void
sampler_init_border_color_gen6(const struct ilo_dev_info *dev,
const union pipe_color_union *color,
}
}
-static uint32_t
-gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev,
- const struct ilo_sampler_cso * const *samplers,
- const struct pipe_sampler_view * const *views,
- const uint32_t *sampler_border_colors,
- int num_samplers,
- struct ilo_cp *cp)
-{
- const int state_align = 32 / 4;
- const int state_len = 4 * num_samplers;
- uint32_t state_offset, *dw;
- int i;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 101:
- *
- * "The sampler state is stored as an array of up to 16 elements..."
- */
- assert(num_samplers <= 16);
-
- if (!num_samplers)
- return 0;
-
- dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE",
- state_len, state_align, &state_offset);
-
- for (i = 0; i < num_samplers; i++) {
- const struct ilo_sampler_cso *sampler = samplers[i];
- const struct pipe_sampler_view *view = views[i];
- const uint32_t border_color = sampler_border_colors[i];
- uint32_t dw_filter, dw_wrap;
-
- /* there may be holes */
- if (!sampler || !view) {
- /* disabled sampler */
- dw[0] = 1 << 31;
- dw[1] = 0;
- dw[2] = 0;
- dw[3] = 0;
- dw += 4;
-
- continue;
- }
-
- /* determine filter and wrap modes */
- switch (view->texture->target) {
- case PIPE_TEXTURE_1D:
- dw_filter = (sampler->anisotropic) ?
- sampler->dw_filter_aniso : sampler->dw_filter;
- dw_wrap = sampler->dw_wrap_1d;
- break;
- case PIPE_TEXTURE_3D:
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 103:
- *
- * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
- * surfaces of type SURFTYPE_3D."
- */
- dw_filter = sampler->dw_filter;
- dw_wrap = sampler->dw_wrap;
- break;
- case PIPE_TEXTURE_CUBE:
- dw_filter = (sampler->anisotropic) ?
- sampler->dw_filter_aniso : sampler->dw_filter;
- dw_wrap = sampler->dw_wrap_cube;
- break;
- default:
- dw_filter = (sampler->anisotropic) ?
- sampler->dw_filter_aniso : sampler->dw_filter;
- dw_wrap = sampler->dw_wrap;
- break;
- }
-
- dw[0] = sampler->payload[0];
- dw[1] = sampler->payload[1];
- assert(!(border_color & 0x1f));
- dw[2] = border_color;
- dw[3] = sampler->payload[2];
-
- dw[0] |= dw_filter;
-
- if (dev->gen >= ILO_GEN(7)) {
- dw[3] |= dw_wrap;
- }
- else {
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 21:
- *
- * "[DevSNB] Errata: Incorrect behavior is observed in cases
- * where the min and mag mode filters are different and
- * SurfMinLOD is nonzero. The determination of MagMode uses the
- * following equation instead of the one in the above
- * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
- *
- * As a way to work around that, we set Base to
- * view->u.tex.first_level.
- */
- dw[0] |= view->u.tex.first_level << 22;
-
- dw[1] |= dw_wrap;
- }
-
- dw += 4;
- }
-
- return state_offset;
-}
-
-static uint32_t
-gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev,
- const struct ilo_sampler_cso *sampler,
- struct ilo_cp *cp)
-{
- const int state_align = 32 / 4;
- const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12;
- uint32_t state_offset, *dw;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE",
- state_len, state_align, &state_offset);
-
- /* see ilo_gpe_init_sampler_cso() */
- memcpy(dw, &sampler->payload[3], state_len * 4);
-
- return state_offset;
-}
-
-static uint32_t
-gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev,
- int size, void **pcb,
- struct ilo_cp *cp)
-{
- /*
- * For all VS, GS, FS, and CS push constant buffers, they must be aligned
- * to 32 bytes, and their sizes are specified in 256-bit units.
- */
- const int state_align = 32 / 4;
- const int state_len = align(size, 32) / 4;
- uint32_t state_offset;
- char *buf;
-
- ILO_GPE_VALID_GEN(dev, 6, 7);
-
- buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER",
- state_len, state_align, &state_offset);
-
- /* zero out the unused range */
- if (size < state_len * 4)
- memset(&buf[size], 0, state_len * 4 - size);
-
- if (pcb)
- *pcb = buf;
-
- return state_offset;
-}
-
static int
gen6_estimate_command_size(const struct ilo_dev_info *dev,
enum ilo_gpe_gen6_command cmd,
#ifndef ILO_GPE_GEN6_H
#define ILO_GPE_GEN6_H
+#include "util/u_dual_blend.h"
+#include "util/u_half.h"
+#include "brw_defines.h"
+#include "intel_reg.h"
+
#include "ilo_common.h"
+#include "ilo_cp.h"
+#include "ilo_format.h"
+#include "ilo_resource.h"
+#include "ilo_shader.h"
#include "ilo_gpe.h"
#define ILO_GPE_VALID_GEN(dev, min_gen, max_gen) \
/* Below are helpers for other GENs */
-int
-ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling);
+/**
+ * Translate a depth/stencil pipe format to the matching hardware
+ * format. Return -1 on errors.
+ */
+static inline int
+gen6_translate_depth_format(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ return BRW_DEPTHFORMAT_D16_UNORM;
+ case PIPE_FORMAT_Z32_FLOAT:
+ return BRW_DEPTHFORMAT_D32_FLOAT;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
+ default:
+ return -1;
+ }
+}
+
+/**
+ * Translate winsys tiling to hardware tiling.
+ */
+static inline int
+ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling)
+{
+ switch (tiling) {
+ case INTEL_TILING_NONE:
+ return 0;
+ case INTEL_TILING_X:
+ return BRW_SURFACE_TILED;
+ case INTEL_TILING_Y:
+ return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
+ default:
+ assert(!"unknown tiling");
+ return 0;
+ }
+}
-int
-ilo_gpe_gen6_translate_pipe_prim(unsigned prim);
+/**
+ * Translate a pipe primitive type to the matching hardware primitive type.
+ */
+static inline int
+ilo_gpe_gen6_translate_pipe_prim(unsigned prim)
+{
+ static const int prim_mapping[PIPE_PRIM_MAX] = {
+ [PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST,
+ [PIPE_PRIM_LINES] = _3DPRIM_LINELIST,
+ [PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP,
+ [PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP,
+ [PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST,
+ [PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
+ [PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
+ [PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST,
+ [PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
+ [PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON,
+ [PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
+ [PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
+ [PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
+ [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
+ };
+
+ assert(prim_mapping[prim]);
+
+ return prim_mapping[prim];
+}
-int
-ilo_gpe_gen6_translate_texture(enum pipe_texture_target target);
+/**
+ * Translate a pipe texture target to the matching hardware surface type.
+ */
+static inline int
+ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
+{
+ switch (target) {
+ case PIPE_BUFFER:
+ return BRW_SURFACE_BUFFER;
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ return BRW_SURFACE_1D;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_2D_ARRAY:
+ return BRW_SURFACE_2D;
+ case PIPE_TEXTURE_3D:
+ return BRW_SURFACE_3D;
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ return BRW_SURFACE_CUBE;
+ default:
+ assert(!"unknown texture target");
+ return BRW_SURFACE_BUFFER;
+ }
+}
-void
+/**
+ * Fill in DW2 to DW7 of 3DSTATE_SF.
+ */
+static inline void
ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
const struct ilo_rasterizer_state *rasterizer,
int num_samples,
enum pipe_format depth_format,
- uint32_t *payload, unsigned payload_len);
+ uint32_t *payload, unsigned payload_len)
+{
+ const struct ilo_rasterizer_sf *sf = &rasterizer->sf;
+
+ assert(payload_len == Elements(sf->payload));
+
+ if (sf) {
+ memcpy(payload, sf->payload, sizeof(sf->payload));
+
+ if (num_samples > 1)
+ payload[1] |= sf->dw_msaa;
+
+ if (dev->gen >= ILO_GEN(7)) {
+ int format;
+
+ /* separate stencil */
+ switch (depth_format) {
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ depth_format = PIPE_FORMAT_Z24X8_UNORM;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ depth_format = PIPE_FORMAT_Z32_FLOAT;;
+ break;
+ case PIPE_FORMAT_S8_UINT:
+ depth_format = PIPE_FORMAT_NONE;
+ break;
+ default:
+ break;
+ }
+
+ format = gen6_translate_depth_format(depth_format);
+ /* FLOAT surface is assumed when there is no depth buffer */
+ if (format < 0)
+ format = BRW_DEPTHFORMAT_D32_FLOAT;
+
+ payload[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
+ }
+ }
+ else {
+ payload[0] = 0;
+ payload[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0;
+ payload[2] = 0;
+ payload[3] = 0;
+ payload[4] = 0;
+ payload[5] = 0;
+ }
+}
-void
+/**
+ * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
+ */
+static inline void
ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
const struct ilo_rasterizer_state *rasterizer,
const struct ilo_shader_state *fs,
const struct ilo_shader_state *last_sh,
- uint32_t *dw, int num_dwords);
+ uint32_t *dw, int num_dwords)
+{
+ int output_count, vue_offset, vue_len;
+ const struct ilo_kernel_routing *routing;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+ assert(num_dwords == 13);
+
+ if (!fs) {
+ memset(dw, 0, sizeof(dw[0]) * num_dwords);
+
+ if (dev->gen >= ILO_GEN(7))
+ dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT;
+ else
+ dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT;
+
+ return;
+ }
+
+ output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
+ assert(output_count <= 32);
+
+ routing = ilo_shader_get_kernel_routing(fs);
+
+ vue_offset = routing->source_skip;
+ assert(vue_offset % 2 == 0);
+ vue_offset /= 2;
+
+ vue_len = (routing->source_len + 1) / 2;
+ if (!vue_len)
+ vue_len = 1;
+
+ if (dev->gen >= ILO_GEN(7)) {
+ dw[0] = output_count << GEN7_SBE_NUM_OUTPUTS_SHIFT |
+ vue_len << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
+ vue_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
+ if (routing->swizzle_enable)
+ dw[0] |= GEN7_SBE_SWIZZLE_ENABLE;
+ }
+ else {
+ dw[0] = output_count << GEN6_SF_NUM_OUTPUTS_SHIFT |
+ vue_len << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
+ vue_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
+ if (routing->swizzle_enable)
+ dw[0] |= GEN6_SF_SWIZZLE_ENABLE;
+ }
+
+ switch (rasterizer->state.sprite_coord_mode) {
+ case PIPE_SPRITE_COORD_UPPER_LEFT:
+ dw[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT;
+ break;
+ case PIPE_SPRITE_COORD_LOWER_LEFT:
+ dw[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT;
+ break;
+ }
+
+ STATIC_ASSERT(Elements(routing->swizzles) >= 16);
+ memcpy(&dw[1], routing->swizzles, 2 * 16);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 268:
+ *
+ * "This field (Point Sprite Texture Coordinate Enable) must be
+ * programmed to 0 when non-point primitives are rendered."
+ *
+ * TODO We do not check that yet.
+ */
+ dw[9] = routing->point_sprite_enable;
+
+ dw[10] = routing->const_interp_enable;
+
+ /* WrapShortest enables */
+ dw[11] = 0;
+ dw[12] = 0;
+}
+
+static inline void
+gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev,
+ struct intel_bo *general_state_bo,
+ struct intel_bo *surface_state_bo,
+ struct intel_bo *dynamic_state_bo,
+ struct intel_bo *indirect_object_bo,
+ struct intel_bo *instruction_bo,
+ uint32_t general_state_size,
+ uint32_t dynamic_state_size,
+ uint32_t indirect_object_size,
+ uint32_t instruction_size,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01);
+ const uint8_t cmd_len = 10;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ /* 4K-page aligned */
+ assert(((general_state_size | dynamic_state_size |
+ indirect_object_size | instruction_size) & 0xfff) == 0);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+
+ ilo_cp_write_bo(cp, 1, general_state_bo,
+ INTEL_DOMAIN_RENDER,
+ 0);
+ ilo_cp_write_bo(cp, 1, surface_state_bo,
+ INTEL_DOMAIN_SAMPLER,
+ 0);
+ ilo_cp_write_bo(cp, 1, dynamic_state_bo,
+ INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
+ 0);
+ ilo_cp_write_bo(cp, 1, indirect_object_bo,
+ 0,
+ 0);
+ ilo_cp_write_bo(cp, 1, instruction_bo,
+ INTEL_DOMAIN_INSTRUCTION,
+ 0);
+
+ if (general_state_size) {
+ ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo,
+ INTEL_DOMAIN_RENDER,
+ 0);
+ }
+ else {
+ /* skip range check */
+ ilo_cp_write(cp, 1);
+ }
+
+ if (dynamic_state_size) {
+ ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo,
+ INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
+ 0);
+ }
+ else {
+ /* skip range check */
+ ilo_cp_write(cp, 0xfffff000 + 1);
+ }
+
+ if (indirect_object_size) {
+ ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo,
+ 0,
+ 0);
+ }
+ else {
+ /* skip range check */
+ ilo_cp_write(cp, 0xfffff000 + 1);
+ }
+
+ if (instruction_size) {
+ ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo,
+ INTEL_DOMAIN_INSTRUCTION,
+ 0);
+ }
+ else {
+ /* skip range check */
+ ilo_cp_write(cp, 1);
+ }
+
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_STATE_SIP(const struct ilo_dev_info *dev,
+ uint32_t sip,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02);
+ const uint8_t cmd_len = 2;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ ilo_cp_begin(cp, cmd_len | (cmd_len - 2));
+ ilo_cp_write(cp, cmd);
+ ilo_cp_write(cp, sip);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev,
+ bool enable,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b);
+ const uint8_t cmd_len = 1;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | enable);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev,
+ int pipeline,
+ struct ilo_cp *cp)
+{
+ const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04);
+ const uint8_t cmd_len = 1;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ /* 3D or media */
+ assert(pipeline == 0x0 || pipeline == 0x1);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | pipeline);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev,
+ int max_threads, int num_urb_entries,
+ int urb_entry_size,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00);
+ const uint8_t cmd_len = 8;
+ uint32_t dw2, dw4;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ dw2 = (max_threads - 1) << 16 |
+ num_urb_entries << 8 |
+ 1 << 7 | /* Reset Gateway Timer */
+ 1 << 6; /* Bypass Gateway Control */
+
+ dw4 = urb_entry_size << 16 | /* URB Entry Allocation Size */
+ 480; /* CURBE Allocation Size */
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0); /* scratch */
+ ilo_cp_write(cp, dw2);
+ ilo_cp_write(cp, 0); /* MBZ */
+ ilo_cp_write(cp, dw4);
+ ilo_cp_write(cp, 0); /* scoreboard */
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev,
+ uint32_t buf, int size,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01);
+ const uint8_t cmd_len = 4;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ assert(buf % 32 == 0);
+ /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
+ size = align(size, 32);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0); /* MBZ */
+ ilo_cp_write(cp, size);
+ ilo_cp_write(cp, buf);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev,
+ uint32_t offset, int num_ids,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02);
+ const uint8_t cmd_len = 4;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ assert(offset % 32 == 0);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0); /* MBZ */
+ /* every ID has 8 DWords */
+ ilo_cp_write(cp, num_ids * 8 * 4);
+ ilo_cp_write(cp, offset);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev,
+ int id, int byte, int thread_count,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03);
+ const uint8_t cmd_len = 2;
+ uint32_t dw1;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ dw1 = id << 16 |
+ byte << 8 |
+ thread_count;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, dw1);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev,
+ int thread_count_water_mark,
+ int barrier_mask,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04);
+ const uint8_t cmd_len = 2;
+ uint32_t dw1;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ dw1 = thread_count_water_mark << 16 |
+ barrier_mask;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, dw1);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev,
+ struct ilo_cp *cp)
+{
+ assert(!"MEDIA_OBJECT_WALKER unsupported");
+}
+
+static inline void
+gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev,
+ uint32_t vs_binding_table,
+ uint32_t gs_binding_table,
+ uint32_t ps_binding_table,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01);
+ const uint8_t cmd_len = 4;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2) |
+ GEN6_BINDING_TABLE_MODIFY_VS |
+ GEN6_BINDING_TABLE_MODIFY_GS |
+ GEN6_BINDING_TABLE_MODIFY_PS);
+ ilo_cp_write(cp, vs_binding_table);
+ ilo_cp_write(cp, gs_binding_table);
+ ilo_cp_write(cp, ps_binding_table);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev,
+ uint32_t vs_sampler_state,
+ uint32_t gs_sampler_state,
+ uint32_t ps_sampler_state,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02);
+ const uint8_t cmd_len = 4;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2) |
+ VS_SAMPLER_STATE_CHANGE |
+ GS_SAMPLER_STATE_CHANGE |
+ PS_SAMPLER_STATE_CHANGE);
+ ilo_cp_write(cp, vs_sampler_state);
+ ilo_cp_write(cp, gs_sampler_state);
+ ilo_cp_write(cp, ps_sampler_state);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev,
+ int vs_total_size, int gs_total_size,
+ int vs_entry_size, int gs_entry_size,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05);
+ const uint8_t cmd_len = 3;
+ const int row_size = 128; /* 1024 bits */
+ int vs_alloc_size, gs_alloc_size;
+ int vs_num_entries, gs_num_entries;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ /* in 1024-bit URB rows */
+ vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
+ gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
+
+ /* the valid range is [1, 5] */
+ if (!vs_alloc_size)
+ vs_alloc_size = 1;
+ if (!gs_alloc_size)
+ gs_alloc_size = 1;
+ assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
+
+ /* the valid range is [24, 256] in multiples of 4 */
+ vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
+ if (vs_num_entries > 256)
+ vs_num_entries = 256;
+ assert(vs_num_entries >= 24);
+
+ /* the valid range is [0, 256] in multiples of 4 */
+ gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
+ if (gs_num_entries > 256)
+ gs_num_entries = 256;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_VS_SIZE_SHIFT |
+ vs_num_entries << GEN6_URB_VS_ENTRIES_SHIFT);
+ ilo_cp_write(cp, gs_num_entries << GEN6_URB_GS_ENTRIES_SHIFT |
+ (gs_alloc_size - 1) << GEN6_URB_GS_SIZE_SHIFT);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
+ const struct ilo_ve_state *ve,
+ const struct ilo_vb_state *vb,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08);
+ uint8_t cmd_len;
+ unsigned hw_idx;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 82:
+ *
+ * "From 1 to 33 VBs can be specified..."
+ */
+ assert(ve->vb_count <= 33);
+
+ if (!ve->vb_count)
+ return;
+
+ cmd_len = 1 + 4 * ve->vb_count;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+
+ for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
+ const unsigned instance_divisor = ve->instance_divisors[hw_idx];
+ const unsigned pipe_idx = ve->vb_mapping[hw_idx];
+ const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx];
+ uint32_t dw;
+
+ dw = hw_idx << GEN6_VB0_INDEX_SHIFT;
+
+ if (instance_divisor)
+ dw |= GEN6_VB0_ACCESS_INSTANCEDATA;
+ else
+ dw |= GEN6_VB0_ACCESS_VERTEXDATA;
+
+ if (dev->gen >= ILO_GEN(7))
+ dw |= GEN7_VB0_ADDRESS_MODIFYENABLE;
+
+ /* use null vb if there is no buffer or the stride is out of range */
+ if (cso->buffer && cso->stride <= 2048) {
+ const struct ilo_buffer *buf = ilo_buffer(cso->buffer);
+ const uint32_t start_offset = cso->buffer_offset;
+ /*
+ * As noted in ilo_translate_format(), we treat some 3-component
+ * formats as 4-component formats to work around hardware
+ * limitations. Imagine the case where the vertex buffer holds a
+ * single PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6.
+ * The hardware would not be able to fetch it because the vertex
+ * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
+ * and that takes at least 8 bytes.
+ *
+ * For the workaround to work, we query the physical size, which is
+ * page aligned, to calculate end_offset so that the last vertex has
+ * a better chance to be fetched.
+ */
+ const uint32_t end_offset = intel_bo_get_size(buf->bo) - 1;
+
+ dw |= cso->stride << BRW_VB0_PITCH_SHIFT;
+
+ ilo_cp_write(cp, dw);
+ ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
+ ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
+ ilo_cp_write(cp, instance_divisor);
+ }
+ else {
+ dw |= 1 << 13;
+
+ ilo_cp_write(cp, dw);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, instance_divisor);
+ }
+ }
+
+ ilo_cp_end(cp);
+}
+
+static inline void
+ve_init_cso_with_components(const struct ilo_dev_info *dev,
+ int comp0, int comp1, int comp2, int comp3,
+ struct ilo_ve_cso *cso)
+{
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ STATIC_ASSERT(Elements(cso->payload) >= 2);
+ cso->payload[0] = GEN6_VE0_VALID;
+ cso->payload[1] =
+ comp0 << BRW_VE1_COMPONENT_0_SHIFT |
+ comp1 << BRW_VE1_COMPONENT_1_SHIFT |
+ comp2 << BRW_VE1_COMPONENT_2_SHIFT |
+ comp3 << BRW_VE1_COMPONENT_3_SHIFT;
+}
+
+static inline void
+ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
+ struct ilo_ve_cso *cso)
+{
+ int format;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 94:
+ *
+ * "- This bit (Edge Flag Enable) must only be ENABLED on the last
+ * valid VERTEX_ELEMENT structure.
+ *
+ * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
+ * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
+ *
+ * - The Source Element Format must be set to the UINT format.
+ *
+ * - [DevSNB]: Edge Flags are not supported for QUADLIST
+ * primitives. Software may elect to convert QUADLIST primitives
+ * to some set of corresponding edge-flag-supported primitive
+ * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
+ */
+
+ cso->payload[0] |= GEN6_VE0_EDGE_FLAG_ENABLE;
+ cso->payload[1] =
+ BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
+ BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_1_SHIFT |
+ BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT |
+ BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT;
+
+ /*
+ * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via
+ * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined
+ * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
+ *
+ * Since all the hardware cares about is whether the flags are zero or not,
+ * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case.
+ */
+ format = (cso->payload[0] >> BRW_VE0_FORMAT_SHIFT) & 0x1ff;
+ if (format == BRW_SURFACEFORMAT_R32_FLOAT) {
+ STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT ==
+ BRW_SURFACEFORMAT_R32_FLOAT - 1);
+
+ cso->payload[0] -= (1 << BRW_VE0_FORMAT_SHIFT);
+ }
+ else {
+ assert(format == BRW_SURFACEFORMAT_R8_UINT);
+ }
+}
+
+static inline void
+gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
+ const struct ilo_ve_state *ve,
+ bool last_velement_edgeflag,
+ bool prepend_generated_ids,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09);
+ uint8_t cmd_len;
+ unsigned i;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 93:
+ *
+ * "Up to 34 (DevSNB+) vertex elements are supported."
+ */
+ assert(ve->count + prepend_generated_ids <= 34);
+
+ if (!ve->count && !prepend_generated_ids) {
+ struct ilo_ve_cso dummy;
+
+ ve_init_cso_with_components(dev,
+ BRW_VE1_COMPONENT_STORE_0,
+ BRW_VE1_COMPONENT_STORE_0,
+ BRW_VE1_COMPONENT_STORE_0,
+ BRW_VE1_COMPONENT_STORE_1_FLT,
+ &dummy);
+
+ cmd_len = 3;
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write_multi(cp, dummy.payload, 2);
+ ilo_cp_end(cp);
+
+ return;
+ }
+
+ cmd_len = 2 * (ve->count + prepend_generated_ids) + 1;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+
+ if (prepend_generated_ids) {
+ struct ilo_ve_cso gen_ids;
+
+ ve_init_cso_with_components(dev,
+ BRW_VE1_COMPONENT_STORE_VID,
+ BRW_VE1_COMPONENT_STORE_IID,
+ BRW_VE1_COMPONENT_NOSTORE,
+ BRW_VE1_COMPONENT_NOSTORE,
+ &gen_ids);
+
+ ilo_cp_write_multi(cp, gen_ids.payload, 2);
+ }
+
+ if (last_velement_edgeflag) {
+ struct ilo_ve_cso edgeflag;
+
+ for (i = 0; i < ve->count - 1; i++)
+ ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
+
+ edgeflag = ve->cso[i];
+ ve_set_cso_edgeflag(dev, &edgeflag);
+ ilo_cp_write_multi(cp, edgeflag.payload, 2);
+ }
+ else {
+ for (i = 0; i < ve->count; i++)
+ ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
+ }
+
+ ilo_cp_end(cp);
+}
+
+/**
+ * Translate an index size to the matching hardware index format.
+ */
+static inline int
+gen6_translate_index_size(int size)
+{
+ switch (size) {
+ case 4: return BRW_INDEX_DWORD;
+ case 2: return BRW_INDEX_WORD;
+ case 1: return BRW_INDEX_BYTE;
+ default:
+ assert(!"unknown index size");
+ return BRW_INDEX_BYTE;
+ }
+}
+
+static inline void
+gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev,
+ const struct ilo_ib_state *ib,
+ bool enable_cut_index,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a);
+ const uint8_t cmd_len = 3;
+ struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
+ uint32_t start_offset, end_offset;
+ int format;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ if (!buf)
+ return;
+
+ format = gen6_translate_index_size(ib->hw_index_size);
+
+ /*
+ * set start_offset to 0 here and adjust pipe_draw_info::start with
+ * ib->draw_start_offset in 3DPRIMITIVE
+ */
+ start_offset = 0;
+ end_offset = buf->bo_size;
+
+ /* end_offset must also be aligned and is inclusive */
+ end_offset -= (end_offset % ib->hw_index_size);
+ end_offset--;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2) |
+ ((enable_cut_index) ? BRW_CUT_INDEX_ENABLE : 0) |
+ format << 8);
+ ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
+ ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev,
+ uint32_t clip_viewport,
+ uint32_t sf_viewport,
+ uint32_t cc_viewport,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d);
+ const uint8_t cmd_len = 4;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2) |
+ GEN6_CLIP_VIEWPORT_MODIFY |
+ GEN6_SF_VIEWPORT_MODIFY |
+ GEN6_CC_VIEWPORT_MODIFY);
+ ilo_cp_write(cp, clip_viewport);
+ ilo_cp_write(cp, sf_viewport);
+ ilo_cp_write(cp, cc_viewport);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
+ uint32_t blend_state,
+ uint32_t depth_stencil_state,
+ uint32_t color_calc_state,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e);
+ const uint8_t cmd_len = 4;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, blend_state | 1);
+ ilo_cp_write(cp, depth_stencil_state | 1);
+ ilo_cp_write(cp, color_calc_state | 1);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev,
+ uint32_t scissor_rect,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f);
+ const uint8_t cmd_len = 2;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, scissor_rect);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *vs,
+ int num_samplers,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10);
+ const uint8_t cmd_len = 6;
+ const struct ilo_shader_cso *cso;
+ uint32_t dw2, dw4, dw5;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ if (!vs) {
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+ return;
+ }
+
+ cso = ilo_shader_get_kernel_cso(vs);
+ dw2 = cso->payload[0];
+ dw4 = cso->payload[1];
+ dw5 = cso->payload[2];
+
+ dw2 |= ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, ilo_shader_get_kernel_offset(vs));
+ ilo_cp_write(cp, dw2);
+ ilo_cp_write(cp, 0); /* scratch */
+ ilo_cp_write(cp, dw4);
+ ilo_cp_write(cp, dw5);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *gs,
+ const struct ilo_shader_state *vs,
+ int verts_per_prim,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
+ const uint8_t cmd_len = 7;
+ uint32_t dw1, dw2, dw4, dw5, dw6;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ if (gs) {
+ const struct ilo_shader_cso *cso;
+
+ dw1 = ilo_shader_get_kernel_offset(gs);
+
+ cso = ilo_shader_get_kernel_cso(gs);
+ dw2 = cso->payload[0];
+ dw4 = cso->payload[1];
+ dw5 = cso->payload[2];
+ dw6 = cso->payload[3];
+ }
+ else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) {
+ struct ilo_shader_cso cso;
+ enum ilo_kernel_param param;
+
+ switch (verts_per_prim) {
+ case 1:
+ param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
+ break;
+ case 2:
+ param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
+ break;
+ default:
+ param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
+ break;
+ }
+
+ dw1 = ilo_shader_get_kernel_offset(vs) +
+ ilo_shader_get_kernel_param(vs, param);
+
+ /* cannot use VS's CSO */
+ ilo_gpe_init_gs_cso_gen6(dev, vs, &cso);
+ dw2 = cso.payload[0];
+ dw4 = cso.payload[1];
+ dw5 = cso.payload[2];
+ dw6 = cso.payload[3];
+ }
+ else {
+ dw1 = 0;
+ dw2 = 0;
+ dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT;
+ dw5 = GEN6_GS_STATISTICS_ENABLE;
+ dw6 = 0;
+ }
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, dw1);
+ ilo_cp_write(cp, dw2);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, dw4);
+ ilo_cp_write(cp, dw5);
+ ilo_cp_write(cp, dw6);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
+ const struct ilo_rasterizer_state *rasterizer,
+ const struct ilo_shader_state *fs,
+ bool enable_guardband,
+ int num_viewports,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12);
+ const uint8_t cmd_len = 4;
+ uint32_t dw1, dw2, dw3;
+
+ if (rasterizer) {
+ int interps;
+
+ dw1 = rasterizer->clip.payload[0];
+ dw2 = rasterizer->clip.payload[1];
+ dw3 = rasterizer->clip.payload[2];
+
+ if (enable_guardband && rasterizer->clip.can_enable_guardband)
+ dw2 |= GEN6_CLIP_GB_TEST;
+
+ interps = (fs) ? ilo_shader_get_kernel_param(fs,
+ ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0;
+
+ if (interps & (1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC |
+ 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC |
+ 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC))
+ dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
+
+ dw3 |= GEN6_CLIP_FORCE_ZERO_RTAINDEX |
+ (num_viewports - 1);
+ }
+ else {
+ dw1 = 0;
+ dw2 = 0;
+ dw3 = 0;
+ }
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, dw1);
+ ilo_cp_write(cp, dw2);
+ ilo_cp_write(cp, dw3);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
+ const struct ilo_rasterizer_state *rasterizer,
+ const struct ilo_shader_state *fs,
+ const struct ilo_shader_state *last_sh,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
+ const uint8_t cmd_len = 20;
+ uint32_t payload_raster[6], payload_sbe[13];
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer,
+ 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster));
+ ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
+ fs, last_sh, payload_sbe, Elements(payload_sbe));
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, payload_sbe[0]);
+ ilo_cp_write_multi(cp, payload_raster, 6);
+ ilo_cp_write_multi(cp, &payload_sbe[1], 12);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *fs,
+ int num_samplers,
+ const struct ilo_rasterizer_state *rasterizer,
+ bool dual_blend, bool cc_may_kill,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
+ const uint8_t cmd_len = 9;
+ const int num_samples = 1;
+ const struct ilo_shader_cso *fs_cso;
+ uint32_t dw2, dw4, dw5, dw6;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ if (!fs) {
+ /* see brwCreateContext() */
+ const int max_threads = (dev->gt == 2) ? 80 : 40;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ /* honor the valid range even if dispatching is disabled */
+ ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+
+ return;
+ }
+
+ fs_cso = ilo_shader_get_kernel_cso(fs);
+ dw2 = fs_cso->payload[0];
+ dw4 = fs_cso->payload[1];
+ dw5 = fs_cso->payload[2];
+ dw6 = fs_cso->payload[3];
+
+ dw2 |= (num_samplers + 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT;
+
+ if (true) {
+ dw4 |= GEN6_WM_STATISTICS_ENABLE;
+ }
+ else {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 248:
+ *
+ * "This bit (Statistics Enable) must be disabled if either of these
+ * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer
+ * Resolve Enable or Depth Buffer Resolve Enable."
+ */
+ dw4 |= GEN6_WM_DEPTH_CLEAR;
+ dw4 |= GEN6_WM_DEPTH_RESOLVE;
+ dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
+ }
+
+ if (cc_may_kill) {
+ dw5 |= GEN6_WM_KILL_ENABLE |
+ GEN6_WM_DISPATCH_ENABLE;
+ }
+
+ if (dual_blend)
+ dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
+
+ dw5 |= rasterizer->wm.payload[0];
+
+ dw6 |= rasterizer->wm.payload[1];
+
+ if (num_samples > 1) {
+ dw6 |= rasterizer->wm.dw_msaa_rast |
+ rasterizer->wm.dw_msaa_disp;
+ }
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
+ ilo_cp_write(cp, dw2);
+ ilo_cp_write(cp, 0); /* scratch */
+ ilo_cp_write(cp, dw4);
+ ilo_cp_write(cp, dw5);
+ ilo_cp_write(cp, dw6);
+ ilo_cp_write(cp, 0); /* kernel 1 */
+ ilo_cp_write(cp, 0); /* kernel 2 */
+ ilo_cp_end(cp);
+}
+
+static inline unsigned
+gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs, int max_read_length,
+ uint32_t *dw, int num_dwords)
+{
+ unsigned enabled = 0x0;
+ int total_read_length, i;
+
+ assert(num_dwords == 4);
+
+ total_read_length = 0;
+ for (i = 0; i < 4; i++) {
+ if (i < num_bufs && sizes[i]) {
+ /* in 256-bit units minus one */
+ const int read_len = (sizes[i] + 31) / 32 - 1;
+
+ assert(bufs[i] % 32 == 0);
+ assert(read_len < 32);
+
+ enabled |= 1 << i;
+ dw[i] = bufs[i] | read_len;
+
+ total_read_length += read_len + 1;
+ }
+ else {
+ dw[i] = 0;
+ }
+ }
+
+ assert(total_read_length <= max_read_length);
+
+ return enabled;
+}
+
+static inline void
+gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15);
+ const uint8_t cmd_len = 5;
+ uint32_t buf_dw[4], buf_enabled;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+ assert(num_bufs <= 4);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 138:
+ *
+ * "The sum of all four read length fields (each incremented to
+ * represent the actual read length) must be less than or equal to 32"
+ */
+ buf_enabled = gen6_fill_3dstate_constant(dev,
+ bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
+ ilo_cp_write(cp, buf_dw[0]);
+ ilo_cp_write(cp, buf_dw[1]);
+ ilo_cp_write(cp, buf_dw[2]);
+ ilo_cp_write(cp, buf_dw[3]);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16);
+ const uint8_t cmd_len = 5;
+ uint32_t buf_dw[4], buf_enabled;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+ assert(num_bufs <= 4);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 161:
+ *
+ * "The sum of all four read length fields (each incremented to
+ * represent the actual read length) must be less than or equal to 64"
+ */
+ buf_enabled = gen6_fill_3dstate_constant(dev,
+ bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
+ ilo_cp_write(cp, buf_dw[0]);
+ ilo_cp_write(cp, buf_dw[1]);
+ ilo_cp_write(cp, buf_dw[2]);
+ ilo_cp_write(cp, buf_dw[3]);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17);
+ const uint8_t cmd_len = 5;
+ uint32_t buf_dw[4], buf_enabled;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+ assert(num_bufs <= 4);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 287:
+ *
+ * "The sum of all four read length fields (each incremented to
+ * represent the actual read length) must be less than or equal to 64"
+ */
+ buf_enabled = gen6_fill_3dstate_constant(dev,
+ bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
+ ilo_cp_write(cp, buf_dw[0]);
+ ilo_cp_write(cp, buf_dw[1]);
+ ilo_cp_write(cp, buf_dw[2]);
+ ilo_cp_write(cp, buf_dw[3]);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
+ unsigned sample_mask,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
+ const uint8_t cmd_len = 2;
+ const unsigned valid_mask = 0xf;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ sample_mask &= valid_mask;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, sample_mask);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev,
+ unsigned x, unsigned y,
+ unsigned width, unsigned height,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00);
+ const uint8_t cmd_len = 4;
+ unsigned xmax = x + width - 1;
+ unsigned ymax = y + height - 1;
+ int rect_limit;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ if (dev->gen >= ILO_GEN(7)) {
+ rect_limit = 16383;
+ }
+ else {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 230:
+ *
+ * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
+ * must be an even number"
+ */
+ assert(y % 2 == 0);
+
+ rect_limit = 8191;
+ }
+
+ if (x > rect_limit) x = rect_limit;
+ if (y > rect_limit) y = rect_limit;
+ if (xmax > rect_limit) xmax = rect_limit;
+ if (ymax > rect_limit) ymax = rect_limit;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, y << 16 | x);
+ ilo_cp_write(cp, ymax << 16 | xmax);
+
+ /*
+ * There is no need to set the origin. It is intended to support front
+ * buffer rendering.
+ */
+ ilo_cp_write(cp, 0);
+
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
+ const struct ilo_zs_surface *zs,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
+ ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
+ const uint8_t cmd_len = 7;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, zs->payload[0]);
+ ilo_cp_write_bo(cp, zs->payload[1], zs->bo,
+ INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
+ ilo_cp_write(cp, zs->payload[2]);
+ ilo_cp_write(cp, zs->payload[3]);
+ ilo_cp_write(cp, zs->payload[4]);
+ ilo_cp_write(cp, zs->payload[5]);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev,
+ int x_offset, int y_offset,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06);
+ const uint8_t cmd_len = 2;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+ assert(x_offset >= 0 && x_offset <= 31);
+ assert(y_offset >= 0 && y_offset <= 31);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, x_offset << 8 | y_offset);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev,
+ const struct pipe_poly_stipple *pattern,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07);
+ const uint8_t cmd_len = 33;
+ int i;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+ assert(Elements(pattern->stipple) == 32);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ for (i = 0; i < 32; i++)
+ ilo_cp_write(cp, pattern->stipple[i]);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev,
+ unsigned pattern, unsigned factor,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08);
+ const uint8_t cmd_len = 3;
+ unsigned inverse;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+ assert((pattern & 0xffff) == pattern);
+ assert(factor >= 1 && factor <= 256);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, pattern);
+
+ if (dev->gen >= ILO_GEN(7)) {
+ /* in U1.16 */
+ inverse = (unsigned) (65536.0f / factor);
+ ilo_cp_write(cp, inverse << 15 | factor);
+ }
+ else {
+ /* in U1.13 */
+ inverse = (unsigned) (8192.0f / factor);
+ ilo_cp_write(cp, inverse << 16 | factor);
+ }
+
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a);
+ const uint8_t cmd_len = 3;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0 << 16 | 0);
+ ilo_cp_write(cp, 0 << 16 | 0);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev,
+ int index, unsigned svbi,
+ unsigned max_svbi,
+ bool load_vertex_count,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b);
+ const uint8_t cmd_len = 4;
+ uint32_t dw1;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+ assert(index >= 0 && index < 4);
+
+ dw1 = index << SVB_INDEX_SHIFT;
+ if (load_vertex_count)
+ dw1 |= SVB_LOAD_INTERNAL_VERTEX_COUNT;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, dw1);
+ ilo_cp_write(cp, svbi);
+ ilo_cp_write(cp, max_svbi);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev,
+ int num_samples,
+ const uint32_t *packed_sample_pos,
+ bool pixel_location_center,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d);
+ const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3;
+ uint32_t dw1, dw2, dw3;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ dw1 = (pixel_location_center) ?
+ MS_PIXEL_LOCATION_CENTER : MS_PIXEL_LOCATION_UPPER_LEFT;
+
+ switch (num_samples) {
+ case 0:
+ case 1:
+ dw1 |= MS_NUMSAMPLES_1;
+ dw2 = 0;
+ dw3 = 0;
+ break;
+ case 4:
+ dw1 |= MS_NUMSAMPLES_4;
+ dw2 = packed_sample_pos[0];
+ dw3 = 0;
+ break;
+ case 8:
+ assert(dev->gen >= ILO_GEN(7));
+ dw1 |= MS_NUMSAMPLES_8;
+ dw2 = packed_sample_pos[0];
+ dw3 = packed_sample_pos[1];
+ break;
+ default:
+ assert(!"unsupported sample count");
+ dw1 |= MS_NUMSAMPLES_1;
+ dw2 = 0;
+ dw3 = 0;
+ break;
+ }
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, dw1);
+ ilo_cp_write(cp, dw2);
+ if (dev->gen >= ILO_GEN(7))
+ ilo_cp_write(cp, dw3);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev,
+ const struct ilo_zs_surface *zs,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
+ ILO_GPE_CMD(0x3, 0x0, 0x06) :
+ ILO_GPE_CMD(0x3, 0x1, 0x0e);
+ const uint8_t cmd_len = 3;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ /* see ilo_gpe_init_zs_surface() */
+ ilo_cp_write(cp, zs->payload[6]);
+ ilo_cp_write_bo(cp, zs->payload[7], zs->separate_s8_bo,
+ INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev,
+ const struct ilo_zs_surface *zs,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
+ ILO_GPE_CMD(0x3, 0x0, 0x07) :
+ ILO_GPE_CMD(0x3, 0x1, 0x0f);
+ const uint8_t cmd_len = 3;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ /* see ilo_gpe_init_zs_surface() */
+ ilo_cp_write(cp, zs->payload[8]);
+ ilo_cp_write_bo(cp, zs->payload[9], zs->hiz_bo,
+ INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
+ uint32_t clear_val,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10);
+ const uint8_t cmd_len = 2;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2) |
+ GEN5_DEPTH_CLEAR_VALID);
+ ilo_cp_write(cp, clear_val);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev,
+ uint32_t dw1,
+ struct intel_bo *bo, uint32_t bo_offset,
+ bool write_qword,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00);
+ const uint8_t cmd_len = (write_qword) ? 5 : 4;
+ const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
+ const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ if (dw1 & PIPE_CONTROL_CS_STALL) {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 73:
+ *
+ * "1 of the following must also be set (when CS stall is set):
+ *
+ * * Depth Cache Flush Enable ([0] of DW1)
+ * * Stall at Pixel Scoreboard ([1] of DW1)
+ * * Depth Stall ([13] of DW1)
+ * * Post-Sync Operation ([13] of DW1)
+ * * Render Target Cache Flush Enable ([12] of DW1)
+ * * Notify Enable ([8] of DW1)"
+ *
+ * From the Ivy Bridge PRM, volume 2 part 1, page 61:
+ *
+ * "One of the following must also be set (when CS stall is set):
+ *
+ * * Render Target Cache Flush Enable ([12] of DW1)
+ * * Depth Cache Flush Enable ([0] of DW1)
+ * * Stall at Pixel Scoreboard ([1] of DW1)
+ * * Depth Stall ([13] of DW1)
+ * * Post-Sync Operation ([13] of DW1)"
+ */
+ uint32_t bit_test = PIPE_CONTROL_WRITE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_STALL_AT_SCOREBOARD |
+ PIPE_CONTROL_DEPTH_STALL;
+
+ /* post-sync op */
+ bit_test |= PIPE_CONTROL_WRITE_IMMEDIATE |
+ PIPE_CONTROL_WRITE_DEPTH_COUNT |
+ PIPE_CONTROL_WRITE_TIMESTAMP;
+
+ if (dev->gen == ILO_GEN(6))
+ bit_test |= PIPE_CONTROL_INTERRUPT_ENABLE;
+
+ assert(dw1 & bit_test);
+ }
+
+ if (dw1 & PIPE_CONTROL_DEPTH_STALL) {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 73:
+ *
+ * "Following bits must be clear (when Depth Stall is set):
+ *
+ * * Render Target Cache Flush Enable ([12] of DW1)
+ * * Depth Cache Flush Enable ([0] of DW1)"
+ */
+ assert(!(dw1 & (PIPE_CONTROL_WRITE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
+ }
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, dw1);
+ ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
+ ilo_cp_write(cp, 0);
+ if (write_qword)
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
+ const struct pipe_draw_info *info,
+ const struct ilo_ib_state *ib,
+ bool rectlist,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
+ const uint8_t cmd_len = 6;
+ const int prim = (rectlist) ?
+ _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
+ const int vb_access = (info->indexed) ?
+ GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
+ GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
+ const uint32_t vb_start = info->start +
+ ((info->indexed) ? ib->draw_start_offset : 0);
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2) |
+ prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
+ vb_access);
+ ilo_cp_write(cp, info->count);
+ ilo_cp_write(cp, vb_start);
+ ilo_cp_write(cp, info->instance_count);
+ ilo_cp_write(cp, info->start_instance);
+ ilo_cp_write(cp, info->index_bias);
+ ilo_cp_end(cp);
+}
+
+static inline uint32_t
+gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state **cs,
+ uint32_t *sampler_state,
+ int *num_samplers,
+ uint32_t *binding_table_state,
+ int *num_surfaces,
+ int num_ids,
+ struct ilo_cp *cp)
+{
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 2, page 34:
+ *
+ * "(Interface Descriptor Total Length) This field must have the same
+ * alignment as the Interface Descriptor Data Start Address.
+ *
+ * It must be DQWord (32-byte) aligned..."
+ *
+ * From the Sandy Bridge PRM, volume 2 part 2, page 35:
+ *
+ * "(Interface Descriptor Data Start Address) Specifies the 32-byte
+ * aligned address of the Interface Descriptor data."
+ */
+ const int state_align = 32 / 4;
+ const int state_len = (32 / 4) * num_ids;
+ uint32_t state_offset, *dw;
+ int i;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA",
+ state_len, state_align, &state_offset);
+
+ for (i = 0; i < num_ids; i++) {
+ dw[0] = ilo_shader_get_kernel_offset(cs[i]);
+ dw[1] = 1 << 18; /* SPF */
+ dw[2] = sampler_state[i] |
+ (num_samplers[i] + 3) / 4 << 2;
+ dw[3] = binding_table_state[i] |
+ num_surfaces[i];
+ dw[4] = 0 << 16 | /* CURBE Read Length */
+ 0; /* CURBE Read Offset */
+ dw[5] = 0; /* Barrier ID */
+ dw[6] = 0;
+ dw[7] = 0;
+
+ dw += 8;
+ }
+
+ return state_offset;
+}
+
+static inline uint32_t
+gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev,
+ const struct ilo_viewport_cso *viewports,
+ unsigned num_viewports,
+ struct ilo_cp *cp)
+{
+ const int state_align = 32 / 4;
+ const int state_len = 8 * num_viewports;
+ uint32_t state_offset, *dw;
+ unsigned i;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 262:
+ *
+ * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
+ * stored as an array of up to 16 elements..."
+ */
+ assert(num_viewports && num_viewports <= 16);
+
+ dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT",
+ state_len, state_align, &state_offset);
+
+ for (i = 0; i < num_viewports; i++) {
+ const struct ilo_viewport_cso *vp = &viewports[i];
+
+ dw[0] = fui(vp->m00);
+ dw[1] = fui(vp->m11);
+ dw[2] = fui(vp->m22);
+ dw[3] = fui(vp->m30);
+ dw[4] = fui(vp->m31);
+ dw[5] = fui(vp->m32);
+ dw[6] = 0;
+ dw[7] = 0;
+
+ dw += 8;
+ }
+
+ return state_offset;
+}
+
+static inline uint32_t
+gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
+ const struct ilo_viewport_cso *viewports,
+ unsigned num_viewports,
+ struct ilo_cp *cp)
+{
+ const int state_align = 32 / 4;
+ const int state_len = 4 * num_viewports;
+ uint32_t state_offset, *dw;
+ unsigned i;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 193:
+ *
+ * "The viewport-related state is stored as an array of up to 16
+ * elements..."
+ */
+ assert(num_viewports && num_viewports <= 16);
+
+ dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT",
+ state_len, state_align, &state_offset);
+
+ for (i = 0; i < num_viewports; i++) {
+ const struct ilo_viewport_cso *vp = &viewports[i];
+
+ dw[0] = fui(vp->min_gbx);
+ dw[1] = fui(vp->max_gbx);
+ dw[2] = fui(vp->min_gby);
+ dw[3] = fui(vp->max_gby);
+
+ dw += 4;
+ }
+
+ return state_offset;
+}
+
+static inline uint32_t
+gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev,
+ const struct ilo_viewport_cso *viewports,
+ unsigned num_viewports,
+ struct ilo_cp *cp)
+{
+ const int state_align = 32 / 4;
+ const int state_len = 2 * num_viewports;
+ uint32_t state_offset, *dw;
+ unsigned i;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 385:
+ *
+ * "The viewport state is stored as an array of up to 16 elements..."
+ */
+ assert(num_viewports && num_viewports <= 16);
+
+ dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT",
+ state_len, state_align, &state_offset);
+
+ for (i = 0; i < num_viewports; i++) {
+ const struct ilo_viewport_cso *vp = &viewports[i];
+
+ dw[0] = fui(vp->min_z);
+ dw[1] = fui(vp->max_z);
+
+ dw += 2;
+ }
+
+ return state_offset;
+}
+
+static inline uint32_t
+gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev,
+ const struct pipe_stencil_ref *stencil_ref,
+ float alpha_ref,
+ const struct pipe_blend_color *blend_color,
+ struct ilo_cp *cp)
+{
+ const int state_align = 64 / 4;
+ const int state_len = 6;
+ uint32_t state_offset, *dw;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE",
+ state_len, state_align, &state_offset);
+
+ dw[0] = stencil_ref->ref_value[0] << 24 |
+ stencil_ref->ref_value[1] << 16 |
+ BRW_ALPHATEST_FORMAT_UNORM8;
+ dw[1] = float_to_ubyte(alpha_ref);
+ dw[2] = fui(blend_color->color[0]);
+ dw[3] = fui(blend_color->color[1]);
+ dw[4] = fui(blend_color->color[2]);
+ dw[5] = fui(blend_color->color[3]);
+
+ return state_offset;
+}
+
+/**
+ * Translate a pipe DSA test function to the matching hardware compare
+ * function.
+ */
+static int
+gen6_translate_dsa_func(unsigned func)
+{
+ switch (func) {
+ case PIPE_FUNC_NEVER: return BRW_COMPAREFUNCTION_NEVER;
+ case PIPE_FUNC_LESS: return BRW_COMPAREFUNCTION_LESS;
+ case PIPE_FUNC_EQUAL: return BRW_COMPAREFUNCTION_EQUAL;
+ case PIPE_FUNC_LEQUAL: return BRW_COMPAREFUNCTION_LEQUAL;
+ case PIPE_FUNC_GREATER: return BRW_COMPAREFUNCTION_GREATER;
+ case PIPE_FUNC_NOTEQUAL: return BRW_COMPAREFUNCTION_NOTEQUAL;
+ case PIPE_FUNC_GEQUAL: return BRW_COMPAREFUNCTION_GEQUAL;
+ case PIPE_FUNC_ALWAYS: return BRW_COMPAREFUNCTION_ALWAYS;
+ default:
+ assert(!"unknown depth/stencil/alpha test function");
+ return BRW_COMPAREFUNCTION_NEVER;
+ }
+}
+
+static inline uint32_t
+gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
+ const struct ilo_blend_state *blend,
+ const struct ilo_fb_state *fb,
+ const struct pipe_alpha_state *alpha,
+ struct ilo_cp *cp)
+{
+ const int state_align = 64 / 4;
+ int state_len;
+ uint32_t state_offset, *dw;
+ unsigned num_targets, i;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 376:
+ *
+ * "The blend state is stored as an array of up to 8 elements..."
+ */
+ num_targets = fb->state.nr_cbufs;
+ assert(num_targets <= 8);
+
+ if (!num_targets) {
+ if (!alpha->enabled)
+ return 0;
+ /* to be able to reference alpha func */
+ num_targets = 1;
+ }
+
+ state_len = 2 * num_targets;
+
+ dw = ilo_cp_steal_ptr(cp, "BLEND_STATE",
+ state_len, state_align, &state_offset);
+
+ for (i = 0; i < num_targets; i++) {
+ const unsigned idx = (blend->independent_blend_enable) ? i : 0;
+ const struct ilo_blend_cso *cso = &blend->cso[idx];
+ const int num_samples = fb->num_samples;
+ const struct util_format_description *format_desc =
+ (idx < fb->state.nr_cbufs) ?
+ util_format_description(fb->state.cbufs[idx]->format) : NULL;
+ bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one;
+
+ rt_is_unorm = true;
+ rt_is_pure_integer = false;
+ rt_dst_alpha_forced_one = false;
+
+ if (format_desc) {
+ int ch;
+
+ switch (format_desc->format) {
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ /* force alpha to one when the HW format has alpha */
+ assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM)
+ == BRW_SURFACEFORMAT_B8G8R8A8_UNORM);
+ rt_dst_alpha_forced_one = true;
+ break;
+ default:
+ break;
+ }
+
+ for (ch = 0; ch < 4; ch++) {
+ if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID)
+ continue;
+
+ if (format_desc->channel[ch].pure_integer) {
+ rt_is_unorm = false;
+ rt_is_pure_integer = true;
+ break;
+ }
+
+ if (!format_desc->channel[ch].normalized ||
+ format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED)
+ rt_is_unorm = false;
+ }
+ }
+
+ dw[0] = cso->payload[0];
+ dw[1] = cso->payload[1];
+
+ if (!rt_is_pure_integer) {
+ if (rt_dst_alpha_forced_one)
+ dw[0] |= cso->dw_blend_dst_alpha_forced_one;
+ else
+ dw[0] |= cso->dw_blend;
+ }
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 365:
+ *
+ * "Logic Ops are only supported on *_UNORM surfaces (excluding
+ * _SRGB variants), otherwise Logic Ops must be DISABLED."
+ *
+ * Since logicop is ignored for non-UNORM color buffers, no special care
+ * is needed.
+ */
+ if (rt_is_unorm)
+ dw[1] |= cso->dw_logicop;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 356:
+ *
+ * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
+ * Dither both must be disabled."
+ *
+ * There is no such limitation on GEN7, or for AlphaToOne. But GL
+ * requires that anyway.
+ */
+ if (num_samples > 1)
+ dw[1] |= cso->dw_alpha_mod;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 382:
+ *
+ * "Alpha Test can only be enabled if Pixel Shader outputs a float
+ * alpha value."
+ */
+ if (alpha->enabled && !rt_is_pure_integer) {
+ dw[1] |= 1 << 16 |
+ gen6_translate_dsa_func(alpha->func) << 13;
+ }
+
+ dw += 2;
+ }
+
+ return state_offset;
+}
+
+static inline uint32_t
+gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev,
+ const struct ilo_dsa_state *dsa,
+ struct ilo_cp *cp)
+{
+ const int state_align = 64 / 4;
+ const int state_len = 3;
+ uint32_t state_offset, *dw;
+
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE",
+ state_len, state_align, &state_offset);
+
+ dw[0] = dsa->payload[0];
+ dw[1] = dsa->payload[1];
+ dw[2] = dsa->payload[2];
+
+ return state_offset;
+}
+
+static inline uint32_t
+gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev,
+ const struct ilo_scissor_state *scissor,
+ unsigned num_viewports,
+ struct ilo_cp *cp)
+{
+ const int state_align = 32 / 4;
+ const int state_len = 2 * num_viewports;
+ uint32_t state_offset, *dw;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 263:
+ *
+ * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
+ * stored as an array of up to 16 elements..."
+ */
+ assert(num_viewports && num_viewports <= 16);
+
+ dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT",
+ state_len, state_align, &state_offset);
+
+ memcpy(dw, scissor->payload, state_len * 4);
+
+ return state_offset;
+}
+
+static inline uint32_t
+gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev,
+ uint32_t *surface_states,
+ int num_surface_states,
+ struct ilo_cp *cp)
+{
+ const int state_align = 32 / 4;
+ const int state_len = num_surface_states;
+ uint32_t state_offset, *dw;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 69:
+ *
+ * "It is stored as an array of up to 256 elements..."
+ */
+ assert(num_surface_states <= 256);
+
+ if (!num_surface_states)
+ return 0;
+
+ dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE",
+ state_len, state_align, &state_offset);
+ memcpy(dw, surface_states,
+ num_surface_states * sizeof(surface_states[0]));
+
+ return state_offset;
+}
+
+static inline uint32_t
+gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev,
+ const struct ilo_view_surface *surf,
+ bool for_render,
+ struct ilo_cp *cp)
+{
+ const int state_align = 32 / 4;
+ const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6;
+ uint32_t state_offset;
+ uint32_t read_domains, write_domain;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ if (for_render) {
+ read_domains = INTEL_DOMAIN_RENDER;
+ write_domain = INTEL_DOMAIN_RENDER;
+ }
+ else {
+ read_domains = INTEL_DOMAIN_SAMPLER;
+ write_domain = 0;
+ }
+
+ ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset);
+
+ STATIC_ASSERT(Elements(surf->payload) >= 8);
+
+ ilo_cp_write(cp, surf->payload[0]);
+ ilo_cp_write_bo(cp, surf->payload[1],
+ surf->bo, read_domains, write_domain);
+ ilo_cp_write(cp, surf->payload[2]);
+ ilo_cp_write(cp, surf->payload[3]);
+ ilo_cp_write(cp, surf->payload[4]);
+ ilo_cp_write(cp, surf->payload[5]);
+
+ if (dev->gen >= ILO_GEN(7)) {
+ ilo_cp_write(cp, surf->payload[6]);
+ ilo_cp_write(cp, surf->payload[7]);
+ }
+
+ ilo_cp_end(cp);
+
+ return state_offset;
+}
+
+static inline uint32_t
+gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev,
+ const struct pipe_stream_output_target *so,
+ const struct pipe_stream_output_info *so_info,
+ int so_index,
+ struct ilo_cp *cp)
+{
+ struct ilo_buffer *buf = ilo_buffer(so->buffer);
+ unsigned bo_offset, struct_size;
+ enum pipe_format elem_format;
+ struct ilo_view_surface surf;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
+ struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
+
+ switch (so_info->output[so_index].num_components) {
+ case 1:
+ elem_format = PIPE_FORMAT_R32_FLOAT;
+ break;
+ case 2:
+ elem_format = PIPE_FORMAT_R32G32_FLOAT;
+ break;
+ case 3:
+ elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ break;
+ case 4:
+ elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ break;
+ default:
+ assert(!"unexpected SO components length");
+ elem_format = PIPE_FORMAT_R32_FLOAT;
+ break;
+ }
+
+ ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size,
+ struct_size, elem_format, false, true, &surf);
+
+ return gen6_emit_SURFACE_STATE(dev, &surf, false, cp);
+}
+
+static inline uint32_t
+gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev,
+ const struct ilo_sampler_cso * const *samplers,
+ const struct pipe_sampler_view * const *views,
+ const uint32_t *sampler_border_colors,
+ int num_samplers,
+ struct ilo_cp *cp)
+{
+ const int state_align = 32 / 4;
+ const int state_len = 4 * num_samplers;
+ uint32_t state_offset, *dw;
+ int i;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 101:
+ *
+ * "The sampler state is stored as an array of up to 16 elements..."
+ */
+ assert(num_samplers <= 16);
+
+ if (!num_samplers)
+ return 0;
+
+ dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE",
+ state_len, state_align, &state_offset);
+
+ for (i = 0; i < num_samplers; i++) {
+ const struct ilo_sampler_cso *sampler = samplers[i];
+ const struct pipe_sampler_view *view = views[i];
+ const uint32_t border_color = sampler_border_colors[i];
+ uint32_t dw_filter, dw_wrap;
+
+ /* there may be holes */
+ if (!sampler || !view) {
+ /* disabled sampler */
+ dw[0] = 1 << 31;
+ dw[1] = 0;
+ dw[2] = 0;
+ dw[3] = 0;
+ dw += 4;
+
+ continue;
+ }
+
+ /* determine filter and wrap modes */
+ switch (view->texture->target) {
+ case PIPE_TEXTURE_1D:
+ dw_filter = (sampler->anisotropic) ?
+ sampler->dw_filter_aniso : sampler->dw_filter;
+ dw_wrap = sampler->dw_wrap_1d;
+ break;
+ case PIPE_TEXTURE_3D:
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 103:
+ *
+ * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
+ * surfaces of type SURFTYPE_3D."
+ */
+ dw_filter = sampler->dw_filter;
+ dw_wrap = sampler->dw_wrap;
+ break;
+ case PIPE_TEXTURE_CUBE:
+ dw_filter = (sampler->anisotropic) ?
+ sampler->dw_filter_aniso : sampler->dw_filter;
+ dw_wrap = sampler->dw_wrap_cube;
+ break;
+ default:
+ dw_filter = (sampler->anisotropic) ?
+ sampler->dw_filter_aniso : sampler->dw_filter;
+ dw_wrap = sampler->dw_wrap;
+ break;
+ }
+
+ dw[0] = sampler->payload[0];
+ dw[1] = sampler->payload[1];
+ assert(!(border_color & 0x1f));
+ dw[2] = border_color;
+ dw[3] = sampler->payload[2];
+
+ dw[0] |= dw_filter;
+
+ if (dev->gen >= ILO_GEN(7)) {
+ dw[3] |= dw_wrap;
+ }
+ else {
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 21:
+ *
+ * "[DevSNB] Errata: Incorrect behavior is observed in cases
+ * where the min and mag mode filters are different and
+ * SurfMinLOD is nonzero. The determination of MagMode uses the
+ * following equation instead of the one in the above
+ * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
+ *
+ * As a way to work around that, we set Base to
+ * view->u.tex.first_level.
+ */
+ dw[0] |= view->u.tex.first_level << 22;
+
+ dw[1] |= dw_wrap;
+ }
+
+ dw += 4;
+ }
+
+ return state_offset;
+}
+
+static inline uint32_t
+gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev,
+ const struct ilo_sampler_cso *sampler,
+ struct ilo_cp *cp)
+{
+ const int state_align = 32 / 4;
+ const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12;
+ uint32_t state_offset, *dw;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE",
+ state_len, state_align, &state_offset);
+
+ /* see ilo_gpe_init_sampler_cso() */
+ memcpy(dw, &sampler->payload[3], state_len * 4);
+
+ return state_offset;
+}
+
+static inline uint32_t
+gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev,
+ int size, void **pcb,
+ struct ilo_cp *cp)
+{
+ /*
+ * For all VS, GS, FS, and CS push constant buffers, they must be aligned
+ * to 32 bytes, and their sizes are specified in 256-bit units.
+ */
+ const int state_align = 32 / 4;
+ const int state_len = align(size, 32) / 4;
+ uint32_t state_offset;
+ char *buf;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7);
+
+ buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER",
+ state_len, state_align, &state_offset);
+
+ /* zero out the unused range */
+ if (size < state_len * 4)
+ memset(&buf[size], 0, state_len * 4 - size);
+
+ if (pcb)
+ *pcb = buf;
+
+ return state_offset;
+}
#endif /* ILO_GPE_GEN6_H */
#include "ilo_shader.h"
#include "ilo_gpe_gen7.h"
-static void
-gen7_emit_GPGPU_WALKER(const struct ilo_dev_info *dev,
- struct ilo_cp *cp)
-{
- assert(!"GPGPU_WALKER unsupported");
-}
-
-static void
-gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
- uint32_t clear_val,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x04);
- const uint8_t cmd_len = 3;
-
- ILO_GPE_VALID_GEN(dev, 7, 7);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, clear_val);
- ilo_cp_write(cp, 1);
- ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3dstate_pointer(const struct ilo_dev_info *dev,
- int subop, uint32_t pointer,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
- const uint8_t cmd_len = 2;
-
- ILO_GPE_VALID_GEN(dev, 7, 7);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, pointer);
- ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
- uint32_t color_calc_state,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp);
-}
-
void
ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
const struct ilo_shader_state *gs,
cso->payload[2] = dw5;
}
-static void
-gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
- const struct ilo_shader_state *gs,
- int num_samplers,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
- const uint8_t cmd_len = 7;
- const struct ilo_shader_cso *cso;
- uint32_t dw2, dw4, dw5;
-
- ILO_GPE_VALID_GEN(dev, 7, 7);
-
- if (!gs) {
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, GEN6_GS_STATISTICS_ENABLE);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
- return;
- }
-
- cso = ilo_shader_get_kernel_cso(gs);
- dw2 = cso->payload[0];
- dw4 = cso->payload[1];
- dw5 = cso->payload[2];
-
- dw2 |= ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, ilo_shader_get_kernel_offset(gs));
- ilo_cp_write(cp, dw2);
- ilo_cp_write(cp, 0); /* scratch */
- ilo_cp_write(cp, dw4);
- ilo_cp_write(cp, dw5);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
- const struct ilo_rasterizer_state *rasterizer,
- const struct pipe_surface *zs_surf,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
- const uint8_t cmd_len = 7;
- const int num_samples = 1;
- uint32_t payload[6];
-
- ILO_GPE_VALID_GEN(dev, 7, 7);
-
- ilo_gpe_gen6_fill_3dstate_sf_raster(dev,
- rasterizer, num_samples,
- (zs_surf) ? zs_surf->format : PIPE_FORMAT_NONE,
- payload, Elements(payload));
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write_multi(cp, payload, 6);
- ilo_cp_end(cp);
-}
-
void
ilo_gpe_init_rasterizer_wm_gen7(const struct ilo_dev_info *dev,
const struct pipe_rasterizer_state *state,
cso->payload[3] = wm_dw1;
}
-static void
-gen7_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
- const struct ilo_shader_state *fs,
- const struct ilo_rasterizer_state *rasterizer,
- bool cc_may_kill,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
- const uint8_t cmd_len = 3;
- const int num_samples = 1;
- uint32_t dw1, dw2;
-
- ILO_GPE_VALID_GEN(dev, 7, 7);
-
- /* see ilo_gpe_init_rasterizer_wm() */
- dw1 = rasterizer->wm.payload[0];
- dw2 = rasterizer->wm.payload[1];
-
- dw1 |= GEN7_WM_STATISTICS_ENABLE;
-
- if (false) {
- dw1 |= GEN7_WM_DEPTH_CLEAR;
- dw1 |= GEN7_WM_DEPTH_RESOLVE;
- dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
- }
-
- if (fs) {
- const struct ilo_shader_cso *fs_cso = ilo_shader_get_kernel_cso(fs);
-
- dw1 |= fs_cso->payload[3];
- }
-
- if (cc_may_kill) {
- dw1 |= GEN7_WM_DISPATCH_ENABLE |
- GEN7_WM_KILL_ENABLE;
- }
-
- if (num_samples > 1) {
- dw1 |= rasterizer->wm.dw_msaa_rast;
- dw2 |= rasterizer->wm.dw_msaa_disp;
- }
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, dw1);
- ilo_cp_write(cp, dw2);
- ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3dstate_constant(const struct ilo_dev_info *dev,
- int subop,
- const uint32_t *bufs, const int *sizes,
- int num_bufs,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
- const uint8_t cmd_len = 7;
- uint32_t dw[6];
- int total_read_length, i;
-
- ILO_GPE_VALID_GEN(dev, 7, 7);
-
- /* VS, HS, DS, GS, and PS variants */
- assert(subop >= 0x15 && subop <= 0x1a && subop != 0x18);
-
- assert(num_bufs <= 4);
-
- dw[0] = 0;
- dw[1] = 0;
-
- total_read_length = 0;
- for (i = 0; i < 4; i++) {
- int read_len;
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 112:
- *
- * "Constant buffers must be enabled in order from Constant Buffer 0
- * to Constant Buffer 3 within this command. For example, it is
- * not allowed to enable Constant Buffer 1 by programming a
- * non-zero value in the VS Constant Buffer 1 Read Length without a
- * non-zero value in VS Constant Buffer 0 Read Length."
- */
- if (i >= num_bufs || !sizes[i]) {
- for (; i < 4; i++) {
- assert(i >= num_bufs || !sizes[i]);
- dw[2 + i] = 0;
- }
- break;
- }
-
- /* read lengths are in 256-bit units */
- read_len = (sizes[i] + 31) / 32;
- /* the lower 5 bits are used for memory object control state */
- assert(bufs[i] % 32 == 0);
-
- dw[i / 2] |= read_len << ((i % 2) ? 16 : 0);
- dw[2 + i] = bufs[i];
-
- total_read_length += read_len;
- }
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 113:
- *
- * "The sum of all four read length fields must be less than or equal
- * to the size of 64"
- */
- assert(total_read_length <= 64);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write_multi(cp, dw, 6);
- ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
- const uint32_t *bufs, const int *sizes,
- int num_bufs,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_constant(dev, 0x15, bufs, sizes, num_bufs, cp);
-}
-
-static void
-gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
- const uint32_t *bufs, const int *sizes,
- int num_bufs,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_constant(dev, 0x16, bufs, sizes, num_bufs, cp);
-}
-
-static void
-gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
- const uint32_t *bufs, const int *sizes,
- int num_bufs,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_constant(dev, 0x17, bufs, sizes, num_bufs, cp);
-}
-
-static void
-gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
- unsigned sample_mask,
- int num_samples,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
- const uint8_t cmd_len = 2;
- const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1;
-
- ILO_GPE_VALID_GEN(dev, 7, 7);
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 294:
- *
- * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
- * (Sample Mask) must be zero.
- *
- * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
- * must be zero."
- */
- sample_mask &= valid_mask;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, sample_mask);
- ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info *dev,
- const uint32_t *bufs, const int *sizes,
- int num_bufs,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_constant(dev, 0x19, bufs, sizes, num_bufs, cp);
-}
-
-static void
-gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info *dev,
- const uint32_t *bufs, const int *sizes,
- int num_bufs,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_constant(dev, 0x1a, bufs, sizes, num_bufs, cp);
-}
-
-static void
-gen7_emit_3DSTATE_HS(const struct ilo_dev_info *dev,
- const struct ilo_shader_state *hs,
- int num_samplers,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1b);
- const uint8_t cmd_len = 7;
-
- ILO_GPE_VALID_GEN(dev, 7, 7);
-
- assert(!hs);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_TE(const struct ilo_dev_info *dev,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1c);
- const uint8_t cmd_len = 4;
-
- ILO_GPE_VALID_GEN(dev, 7, 7);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_DS(const struct ilo_dev_info *dev,
- const struct ilo_shader_state *ds,
- int num_samplers,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1d);
- const uint8_t cmd_len = 6;
-
- ILO_GPE_VALID_GEN(dev, 7, 7);
-
- assert(!ds);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
-
-}
-
-static void
-gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info *dev,
- unsigned buffer_mask,
- int vertex_attrib_count,
- bool rasterizer_discard,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1e);
- const uint8_t cmd_len = 3;
- const bool enable = (buffer_mask != 0);
- uint32_t dw1, dw2;
- int read_len;
-
- ILO_GPE_VALID_GEN(dev, 7, 7);
-
- if (!enable) {
- dw1 = 0 << SO_RENDER_STREAM_SELECT_SHIFT;
- if (rasterizer_discard)
- dw1 |= SO_RENDERING_DISABLE;
-
- dw2 = 0;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, dw1);
- ilo_cp_write(cp, dw2);
- ilo_cp_end(cp);
- return;
- }
-
- read_len = (vertex_attrib_count + 1) / 2;
- if (!read_len)
- read_len = 1;
-
- dw1 = SO_FUNCTION_ENABLE |
- 0 << SO_RENDER_STREAM_SELECT_SHIFT |
- SO_STATISTICS_ENABLE |
- buffer_mask << 8;
-
- if (rasterizer_discard)
- dw1 |= SO_RENDERING_DISABLE;
-
- /* API_OPENGL */
- if (true)
- dw1 |= SO_REORDER_TRAILING;
-
- dw2 = 0 << SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT |
- 0 << SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT |
- 0 << SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT |
- 0 << SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT |
- 0 << SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT |
- 0 << SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT |
- 0 << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT |
- (read_len - 1) << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, dw1);
- ilo_cp_write(cp, dw2);
- ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev,
- const struct ilo_rasterizer_state *rasterizer,
- const struct ilo_shader_state *fs,
- const struct ilo_shader_state *last_sh,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1f);
- const uint8_t cmd_len = 14;
- uint32_t dw[13];
-
- ILO_GPE_VALID_GEN(dev, 7, 7);
-
- ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
- fs, last_sh, dw, Elements(dw));
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write_multi(cp, dw, 13);
- ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev,
- const struct ilo_shader_state *fs,
- int num_samplers, bool dual_blend,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x20);
- const uint8_t cmd_len = 8;
- const struct ilo_shader_cso *cso;
- uint32_t dw2, dw4, dw5;
-
- ILO_GPE_VALID_GEN(dev, 7, 7);
-
- if (!fs) {
- /* see brwCreateContext() */
- const int max_threads = (dev->gt == 2) ? 172 : 48;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- /* GPU hangs if none of the dispatch enable bits is set */
- ilo_cp_write(cp, (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
- GEN7_PS_8_DISPATCH_ENABLE);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
-
- return;
- }
-
- cso = ilo_shader_get_kernel_cso(fs);
- dw2 = cso->payload[0];
- dw4 = cso->payload[1];
- dw5 = cso->payload[2];
-
- dw2 |= (num_samplers + 3) / 4 << GEN7_PS_SAMPLER_COUNT_SHIFT;
-
- if (dual_blend)
- dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
- ilo_cp_write(cp, dw2);
- ilo_cp_write(cp, 0); /* scratch */
- ilo_cp_write(cp, dw4);
- ilo_cp_write(cp, dw5);
- ilo_cp_write(cp, 0); /* kernel 1 */
- ilo_cp_write(cp, 0); /* kernel 2 */
- ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info *dev,
- uint32_t sf_clip_viewport,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x21, sf_clip_viewport, cp);
-}
-
-static void
-gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info *dev,
- uint32_t cc_viewport,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x23, cc_viewport, cp);
-}
-
-static void
-gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info *dev,
- uint32_t blend_state,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x24, blend_state, cp);
-}
-
-static void
-gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info *dev,
- uint32_t depth_stencil_state,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x25, depth_stencil_state, cp);
-}
-
-static void
-gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info *dev,
- uint32_t binding_table,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x26, binding_table, cp);
-}
-
-static void
-gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info *dev,
- uint32_t binding_table,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x27, binding_table, cp);
-}
-
-static void
-gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info *dev,
- uint32_t binding_table,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x28, binding_table, cp);
-}
-
-static void
-gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info *dev,
- uint32_t binding_table,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x29, binding_table, cp);
-}
-
-static void
-gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info *dev,
- uint32_t binding_table,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x2a, binding_table, cp);
-}
-
-static void
-gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info *dev,
- uint32_t sampler_state,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x2b, sampler_state, cp);
-}
-
-static void
-gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info *dev,
- uint32_t sampler_state,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x2c, sampler_state, cp);
-}
-
-static void
-gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info *dev,
- uint32_t sampler_state,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x2d, sampler_state, cp);
-}
-
-static void
-gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info *dev,
- uint32_t sampler_state,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x2e, sampler_state, cp);
-}
-
-static void
-gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info *dev,
- uint32_t sampler_state,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_pointer(dev, 0x2f, sampler_state, cp);
-}
-
-static void
-gen7_emit_3dstate_urb(const struct ilo_dev_info *dev,
- int subop, int offset, int size,
- int entry_size,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
- const uint8_t cmd_len = 2;
- const int row_size = 64; /* 512 bits */
- int alloc_size, num_entries, min_entries, max_entries;
-
- ILO_GPE_VALID_GEN(dev, 7, 7);
-
- /* VS, HS, DS, and GS variants */
- assert(subop >= 0x30 && subop <= 0x33);
-
- /* in multiples of 8KB */
- assert(offset % 8192 == 0);
- offset /= 8192;
-
- /* in multiple of 512-bit rows */
- alloc_size = (entry_size + row_size - 1) / row_size;
- if (!alloc_size)
- alloc_size = 1;
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 34:
- *
- * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
- * cause performance to decrease due to banking in the URB. Element
- * sizes of 16 to 20 should be programmed with six 512-bit URB rows."
- */
- if (subop == 0x30 && alloc_size == 5)
- alloc_size = 6;
-
- /* in multiples of 8 */
- num_entries = (size / row_size / alloc_size) & ~7;
-
- switch (subop) {
- case 0x30: /* 3DSTATE_URB_VS */
- min_entries = 32;
- max_entries = (dev->gt == 2) ? 704 : 512;
-
- assert(num_entries >= min_entries);
- if (num_entries > max_entries)
- num_entries = max_entries;
- break;
- case 0x31: /* 3DSTATE_URB_HS */
- max_entries = (dev->gt == 2) ? 64 : 32;
- if (num_entries > max_entries)
- num_entries = max_entries;
- break;
- case 0x32: /* 3DSTATE_URB_DS */
- if (num_entries)
- assert(num_entries >= 138);
- break;
- case 0x33: /* 3DSTATE_URB_GS */
- max_entries = (dev->gt == 2) ? 320 : 192;
- if (num_entries > max_entries)
- num_entries = max_entries;
- break;
- default:
- break;
- }
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, offset << GEN7_URB_STARTING_ADDRESS_SHIFT |
- (alloc_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
- num_entries);
- ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info *dev,
- int offset, int size, int entry_size,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_urb(dev, 0x30, offset, size, entry_size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info *dev,
- int offset, int size, int entry_size,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_urb(dev, 0x31, offset, size, entry_size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info *dev,
- int offset, int size, int entry_size,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_urb(dev, 0x32, offset, size, entry_size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info *dev,
- int offset, int size, int entry_size,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_urb(dev, 0x33, offset, size, entry_size, cp);
-}
-
-static void
-gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info *dev,
- int subop, int offset, int size,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, subop);
- const uint8_t cmd_len = 2;
- int end;
-
- ILO_GPE_VALID_GEN(dev, 7, 7);
-
- /* VS, HS, DS, GS, and PS variants */
- assert(subop >= 0x12 && subop <= 0x16);
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 68:
- *
- * "(A table that says the maximum size of each constant buffer is
- * 16KB")
- *
- * From the Ivy Bridge PRM, volume 2 part 1, page 115:
- *
- * "The sum of the Constant Buffer Offset and the Constant Buffer Size
- * may not exceed the maximum value of the Constant Buffer Size."
- *
- * Thus, the valid range of buffer end is [0KB, 16KB].
- */
- end = (offset + size) / 1024;
- if (end > 16) {
- assert(!"invalid constant buffer end");
- end = 16;
- }
-
- /* the valid range of buffer offset is [0KB, 15KB] */
- offset = (offset + 1023) / 1024;
- if (offset > 15) {
- assert(!"invalid constant buffer offset");
- offset = 15;
- }
-
- if (offset > end) {
- assert(!size);
- offset = end;
- }
-
- /* the valid range of buffer size is [0KB, 15KB] */
- size = end - offset;
- if (size > 15) {
- assert(!"invalid constant buffer size");
- size = 15;
- }
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT |
- size);
- ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info *dev,
- int offset, int size,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_push_constant_alloc(dev, 0x12, offset, size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info *dev,
- int offset, int size,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_push_constant_alloc(dev, 0x13, offset, size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info *dev,
- int offset, int size,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_push_constant_alloc(dev, 0x14, offset, size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info *dev,
- int offset, int size,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_push_constant_alloc(dev, 0x15, offset, size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info *dev,
- int offset, int size,
- struct ilo_cp *cp)
-{
- gen7_emit_3dstate_push_constant_alloc(dev, 0x16, offset, size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev,
- const struct pipe_stream_output_info *so_info,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x17);
- uint16_t cmd_len;
- int buffer_selects, num_entries, i;
- uint16_t so_decls[128];
-
- ILO_GPE_VALID_GEN(dev, 7, 7);
-
- buffer_selects = 0;
- num_entries = 0;
-
- if (so_info) {
- int buffer_offsets[PIPE_MAX_SO_BUFFERS];
-
- memset(buffer_offsets, 0, sizeof(buffer_offsets));
-
- for (i = 0; i < so_info->num_outputs; i++) {
- unsigned decl, buf, reg, mask;
-
- buf = so_info->output[i].output_buffer;
-
- /* pad with holes */
- assert(buffer_offsets[buf] <= so_info->output[i].dst_offset);
- while (buffer_offsets[buf] < so_info->output[i].dst_offset) {
- int num_dwords;
-
- num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf];
- if (num_dwords > 4)
- num_dwords = 4;
-
- decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT |
- SO_DECL_HOLE_FLAG |
- ((1 << num_dwords) - 1) << SO_DECL_COMPONENT_MASK_SHIFT;
-
- so_decls[num_entries++] = decl;
- buffer_offsets[buf] += num_dwords;
- }
-
- reg = so_info->output[i].register_index;
- mask = ((1 << so_info->output[i].num_components) - 1) <<
- so_info->output[i].start_component;
-
- decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT |
- reg << SO_DECL_REGISTER_INDEX_SHIFT |
- mask << SO_DECL_COMPONENT_MASK_SHIFT;
-
- so_decls[num_entries++] = decl;
- buffer_selects |= 1 << buf;
- buffer_offsets[buf] += so_info->output[i].num_components;
- }
- }
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 201:
- *
- * "Errata: All 128 decls for all four streams must be included
- * whenever this command is issued. The "Num Entries [n]" fields still
- * contain the actual numbers of valid decls."
- *
- * Also note that "DWord Length" has 9 bits for this command, and the type
- * of cmd_len is thus uint16_t.
- */
- cmd_len = 2 * 128 + 3;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, 0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT |
- 0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT |
- 0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT |
- buffer_selects << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT);
- ilo_cp_write(cp, 0 << SO_NUM_ENTRIES_3_SHIFT |
- 0 << SO_NUM_ENTRIES_2_SHIFT |
- 0 << SO_NUM_ENTRIES_1_SHIFT |
- num_entries << SO_NUM_ENTRIES_0_SHIFT);
-
- for (i = 0; i < num_entries; i++) {
- ilo_cp_write(cp, so_decls[i]);
- ilo_cp_write(cp, 0);
- }
- for (; i < 128; i++) {
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- }
-
- ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev,
- int index, int base, int stride,
- const struct pipe_stream_output_target *so_target,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x18);
- const uint8_t cmd_len = 4;
- struct ilo_buffer *buf;
- int end;
-
- ILO_GPE_VALID_GEN(dev, 7, 7);
-
- if (!so_target || !so_target->buffer) {
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT);
- ilo_cp_write(cp, 0);
- ilo_cp_write(cp, 0);
- ilo_cp_end(cp);
- return;
- }
-
- buf = ilo_buffer(so_target->buffer);
-
- /* DWord-aligned */
- assert(stride % 4 == 0 && base % 4 == 0);
- assert(so_target->buffer_offset % 4 == 0);
-
- stride &= ~3;
- base = (base + so_target->buffer_offset) & ~3;
- end = (base + so_target->buffer_size) & ~3;
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT |
- stride);
- ilo_cp_write_bo(cp, base, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
- ilo_cp_write_bo(cp, end, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
- ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
- const struct pipe_draw_info *info,
- const struct ilo_ib_state *ib,
- bool rectlist,
- struct ilo_cp *cp)
-{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
- const uint8_t cmd_len = 7;
- const int prim = (rectlist) ?
- _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
- const int vb_access = (info->indexed) ?
- GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
- GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
- const uint32_t vb_start = info->start +
- ((info->indexed) ? ib->draw_start_offset : 0);
-
- ILO_GPE_VALID_GEN(dev, 7, 7);
-
- ilo_cp_begin(cp, cmd_len);
- ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, vb_access | prim);
- ilo_cp_write(cp, info->count);
- ilo_cp_write(cp, vb_start);
- ilo_cp_write(cp, info->instance_count);
- ilo_cp_write(cp, info->start_instance);
- ilo_cp_write(cp, info->index_bias);
- ilo_cp_end(cp);
-}
-
-static uint32_t
-gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
- const struct ilo_viewport_cso *viewports,
- unsigned num_viewports,
- struct ilo_cp *cp)
-{
- const int state_align = 64 / 4;
- const int state_len = 16 * num_viewports;
- uint32_t state_offset, *dw;
- unsigned i;
-
- ILO_GPE_VALID_GEN(dev, 7, 7);
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 270:
- *
- * "The viewport-specific state used by both the SF and CL units
- * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
- * of which contains the DWords described below. The start of each
- * element is spaced 16 DWords apart. The location of first element of
- * the array, as specified by both Pointer to SF_VIEWPORT and Pointer
- * to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
- */
- assert(num_viewports && num_viewports <= 16);
-
- dw = ilo_cp_steal_ptr(cp, "SF_CLIP_VIEWPORT",
- state_len, state_align, &state_offset);
-
- for (i = 0; i < num_viewports; i++) {
- const struct ilo_viewport_cso *vp = &viewports[i];
-
- dw[0] = fui(vp->m00);
- dw[1] = fui(vp->m11);
- dw[2] = fui(vp->m22);
- dw[3] = fui(vp->m30);
- dw[4] = fui(vp->m31);
- dw[5] = fui(vp->m32);
- dw[6] = 0;
- dw[7] = 0;
- dw[8] = fui(vp->min_gbx);
- dw[9] = fui(vp->max_gbx);
- dw[10] = fui(vp->min_gby);
- dw[11] = fui(vp->max_gby);
- dw[12] = 0;
- dw[13] = 0;
- dw[14] = 0;
- dw[15] = 0;
-
- dw += 16;
- }
-
- return state_offset;
-}
-
void
ilo_gpe_init_view_surface_null_gen7(const struct ilo_dev_info *dev,
unsigned width, unsigned height,
const struct ilo_gpe_gen7 *
ilo_gpe_gen7_get(void);
+static inline void
+gen7_emit_GPGPU_WALKER(const struct ilo_dev_info *dev,
+ struct ilo_cp *cp)
+{
+ assert(!"GPGPU_WALKER unsupported");
+}
+
+static inline void
+gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
+ uint32_t clear_val,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x04);
+ const uint8_t cmd_len = 3;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, clear_val);
+ ilo_cp_write(cp, 1);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3dstate_pointer(const struct ilo_dev_info *dev,
+ int subop, uint32_t pointer,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
+ const uint8_t cmd_len = 2;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, pointer);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
+ uint32_t color_calc_state,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *gs,
+ int num_samplers,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
+ const uint8_t cmd_len = 7;
+ const struct ilo_shader_cso *cso;
+ uint32_t dw2, dw4, dw5;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ if (!gs) {
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, GEN6_GS_STATISTICS_ENABLE);
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+ return;
+ }
+
+ cso = ilo_shader_get_kernel_cso(gs);
+ dw2 = cso->payload[0];
+ dw4 = cso->payload[1];
+ dw5 = cso->payload[2];
+
+ dw2 |= ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, ilo_shader_get_kernel_offset(gs));
+ ilo_cp_write(cp, dw2);
+ ilo_cp_write(cp, 0); /* scratch */
+ ilo_cp_write(cp, dw4);
+ ilo_cp_write(cp, dw5);
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
+ const struct ilo_rasterizer_state *rasterizer,
+ const struct pipe_surface *zs_surf,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
+ const uint8_t cmd_len = 7;
+ const int num_samples = 1;
+ uint32_t payload[6];
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ ilo_gpe_gen6_fill_3dstate_sf_raster(dev,
+ rasterizer, num_samples,
+ (zs_surf) ? zs_surf->format : PIPE_FORMAT_NONE,
+ payload, Elements(payload));
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write_multi(cp, payload, 6);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *fs,
+ const struct ilo_rasterizer_state *rasterizer,
+ bool cc_may_kill,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
+ const uint8_t cmd_len = 3;
+ const int num_samples = 1;
+ uint32_t dw1, dw2;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ /* see ilo_gpe_init_rasterizer_wm() */
+ dw1 = rasterizer->wm.payload[0];
+ dw2 = rasterizer->wm.payload[1];
+
+ dw1 |= GEN7_WM_STATISTICS_ENABLE;
+
+ if (false) {
+ dw1 |= GEN7_WM_DEPTH_CLEAR;
+ dw1 |= GEN7_WM_DEPTH_RESOLVE;
+ dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
+ }
+
+ if (fs) {
+ const struct ilo_shader_cso *fs_cso = ilo_shader_get_kernel_cso(fs);
+
+ dw1 |= fs_cso->payload[3];
+ }
+
+ if (cc_may_kill) {
+ dw1 |= GEN7_WM_DISPATCH_ENABLE |
+ GEN7_WM_KILL_ENABLE;
+ }
+
+ if (num_samples > 1) {
+ dw1 |= rasterizer->wm.dw_msaa_rast;
+ dw2 |= rasterizer->wm.dw_msaa_disp;
+ }
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, dw1);
+ ilo_cp_write(cp, dw2);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3dstate_constant(const struct ilo_dev_info *dev,
+ int subop,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
+ const uint8_t cmd_len = 7;
+ uint32_t dw[6];
+ int total_read_length, i;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ /* VS, HS, DS, GS, and PS variants */
+ assert(subop >= 0x15 && subop <= 0x1a && subop != 0x18);
+
+ assert(num_bufs <= 4);
+
+ dw[0] = 0;
+ dw[1] = 0;
+
+ total_read_length = 0;
+ for (i = 0; i < 4; i++) {
+ int read_len;
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 112:
+ *
+ * "Constant buffers must be enabled in order from Constant Buffer 0
+ * to Constant Buffer 3 within this command. For example, it is
+ * not allowed to enable Constant Buffer 1 by programming a
+ * non-zero value in the VS Constant Buffer 1 Read Length without a
+ * non-zero value in VS Constant Buffer 0 Read Length."
+ */
+ if (i >= num_bufs || !sizes[i]) {
+ for (; i < 4; i++) {
+ assert(i >= num_bufs || !sizes[i]);
+ dw[2 + i] = 0;
+ }
+ break;
+ }
+
+ /* read lengths are in 256-bit units */
+ read_len = (sizes[i] + 31) / 32;
+ /* the lower 5 bits are used for memory object control state */
+ assert(bufs[i] % 32 == 0);
+
+ dw[i / 2] |= read_len << ((i % 2) ? 16 : 0);
+ dw[2 + i] = bufs[i];
+
+ total_read_length += read_len;
+ }
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 113:
+ *
+ * "The sum of all four read length fields must be less than or equal
+ * to the size of 64"
+ */
+ assert(total_read_length <= 64);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write_multi(cp, dw, 6);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_constant(dev, 0x15, bufs, sizes, num_bufs, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_constant(dev, 0x16, bufs, sizes, num_bufs, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_constant(dev, 0x17, bufs, sizes, num_bufs, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
+ unsigned sample_mask,
+ int num_samples,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
+ const uint8_t cmd_len = 2;
+ const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 294:
+ *
+ * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
+ * (Sample Mask) must be zero.
+ *
+ * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
+ * must be zero."
+ */
+ sample_mask &= valid_mask;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, sample_mask);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_constant(dev, 0x19, bufs, sizes, num_bufs, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_constant(dev, 0x1a, bufs, sizes, num_bufs, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_HS(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *hs,
+ int num_samplers,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1b);
+ const uint8_t cmd_len = 7;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ assert(!hs);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_TE(const struct ilo_dev_info *dev,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1c);
+ const uint8_t cmd_len = 4;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_DS(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *ds,
+ int num_samplers,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1d);
+ const uint8_t cmd_len = 6;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ assert(!ds);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+
+}
+
+static inline void
+gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info *dev,
+ unsigned buffer_mask,
+ int vertex_attrib_count,
+ bool rasterizer_discard,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1e);
+ const uint8_t cmd_len = 3;
+ const bool enable = (buffer_mask != 0);
+ uint32_t dw1, dw2;
+ int read_len;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ if (!enable) {
+ dw1 = 0 << SO_RENDER_STREAM_SELECT_SHIFT;
+ if (rasterizer_discard)
+ dw1 |= SO_RENDERING_DISABLE;
+
+ dw2 = 0;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, dw1);
+ ilo_cp_write(cp, dw2);
+ ilo_cp_end(cp);
+ return;
+ }
+
+ read_len = (vertex_attrib_count + 1) / 2;
+ if (!read_len)
+ read_len = 1;
+
+ dw1 = SO_FUNCTION_ENABLE |
+ 0 << SO_RENDER_STREAM_SELECT_SHIFT |
+ SO_STATISTICS_ENABLE |
+ buffer_mask << 8;
+
+ if (rasterizer_discard)
+ dw1 |= SO_RENDERING_DISABLE;
+
+ /* API_OPENGL */
+ if (true)
+ dw1 |= SO_REORDER_TRAILING;
+
+ dw2 = 0 << SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT |
+ 0 << SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT |
+ 0 << SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT |
+ 0 << SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT |
+ 0 << SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT |
+ 0 << SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT |
+ 0 << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT |
+ (read_len - 1) << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, dw1);
+ ilo_cp_write(cp, dw2);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev,
+ const struct ilo_rasterizer_state *rasterizer,
+ const struct ilo_shader_state *fs,
+ const struct ilo_shader_state *last_sh,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1f);
+ const uint8_t cmd_len = 14;
+ uint32_t dw[13];
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
+ fs, last_sh, dw, Elements(dw));
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write_multi(cp, dw, 13);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *fs,
+ int num_samplers, bool dual_blend,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x20);
+ const uint8_t cmd_len = 8;
+ const struct ilo_shader_cso *cso;
+ uint32_t dw2, dw4, dw5;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ if (!fs) {
+ /* see brwCreateContext() */
+ const int max_threads = (dev->gt == 2) ? 172 : 48;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ /* GPU hangs if none of the dispatch enable bits is set */
+ ilo_cp_write(cp, (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
+ GEN7_PS_8_DISPATCH_ENABLE);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+
+ return;
+ }
+
+ cso = ilo_shader_get_kernel_cso(fs);
+ dw2 = cso->payload[0];
+ dw4 = cso->payload[1];
+ dw5 = cso->payload[2];
+
+ dw2 |= (num_samplers + 3) / 4 << GEN7_PS_SAMPLER_COUNT_SHIFT;
+
+ if (dual_blend)
+ dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
+ ilo_cp_write(cp, dw2);
+ ilo_cp_write(cp, 0); /* scratch */
+ ilo_cp_write(cp, dw4);
+ ilo_cp_write(cp, dw5);
+ ilo_cp_write(cp, 0); /* kernel 1 */
+ ilo_cp_write(cp, 0); /* kernel 2 */
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info *dev,
+ uint32_t sf_clip_viewport,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x21, sf_clip_viewport, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info *dev,
+ uint32_t cc_viewport,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x23, cc_viewport, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info *dev,
+ uint32_t blend_state,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x24, blend_state, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info *dev,
+ uint32_t depth_stencil_state,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x25, depth_stencil_state, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info *dev,
+ uint32_t binding_table,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x26, binding_table, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info *dev,
+ uint32_t binding_table,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x27, binding_table, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info *dev,
+ uint32_t binding_table,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x28, binding_table, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info *dev,
+ uint32_t binding_table,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x29, binding_table, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info *dev,
+ uint32_t binding_table,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x2a, binding_table, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info *dev,
+ uint32_t sampler_state,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x2b, sampler_state, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info *dev,
+ uint32_t sampler_state,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x2c, sampler_state, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info *dev,
+ uint32_t sampler_state,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x2d, sampler_state, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info *dev,
+ uint32_t sampler_state,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x2e, sampler_state, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info *dev,
+ uint32_t sampler_state,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_pointer(dev, 0x2f, sampler_state, cp);
+}
+
+static inline void
+gen7_emit_3dstate_urb(const struct ilo_dev_info *dev,
+ int subop, int offset, int size,
+ int entry_size,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
+ const uint8_t cmd_len = 2;
+ const int row_size = 64; /* 512 bits */
+ int alloc_size, num_entries, min_entries, max_entries;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ /* VS, HS, DS, and GS variants */
+ assert(subop >= 0x30 && subop <= 0x33);
+
+ /* in multiples of 8KB */
+ assert(offset % 8192 == 0);
+ offset /= 8192;
+
+ /* in multiple of 512-bit rows */
+ alloc_size = (entry_size + row_size - 1) / row_size;
+ if (!alloc_size)
+ alloc_size = 1;
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 34:
+ *
+ * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
+ * cause performance to decrease due to banking in the URB. Element
+ * sizes of 16 to 20 should be programmed with six 512-bit URB rows."
+ */
+ if (subop == 0x30 && alloc_size == 5)
+ alloc_size = 6;
+
+ /* in multiples of 8 */
+ num_entries = (size / row_size / alloc_size) & ~7;
+
+ switch (subop) {
+ case 0x30: /* 3DSTATE_URB_VS */
+ min_entries = 32;
+ max_entries = (dev->gt == 2) ? 704 : 512;
+
+ assert(num_entries >= min_entries);
+ if (num_entries > max_entries)
+ num_entries = max_entries;
+ break;
+ case 0x31: /* 3DSTATE_URB_HS */
+ max_entries = (dev->gt == 2) ? 64 : 32;
+ if (num_entries > max_entries)
+ num_entries = max_entries;
+ break;
+ case 0x32: /* 3DSTATE_URB_DS */
+ if (num_entries)
+ assert(num_entries >= 138);
+ break;
+ case 0x33: /* 3DSTATE_URB_GS */
+ max_entries = (dev->gt == 2) ? 320 : 192;
+ if (num_entries > max_entries)
+ num_entries = max_entries;
+ break;
+ default:
+ break;
+ }
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, offset << GEN7_URB_STARTING_ADDRESS_SHIFT |
+ (alloc_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
+ num_entries);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info *dev,
+ int offset, int size, int entry_size,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_urb(dev, 0x30, offset, size, entry_size, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info *dev,
+ int offset, int size, int entry_size,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_urb(dev, 0x31, offset, size, entry_size, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info *dev,
+ int offset, int size, int entry_size,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_urb(dev, 0x32, offset, size, entry_size, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info *dev,
+ int offset, int size, int entry_size,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_urb(dev, 0x33, offset, size, entry_size, cp);
+}
+
+static inline void
+gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info *dev,
+ int subop, int offset, int size,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, subop);
+ const uint8_t cmd_len = 2;
+ int end;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ /* VS, HS, DS, GS, and PS variants */
+ assert(subop >= 0x12 && subop <= 0x16);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 68:
+ *
+ * "(A table that says the maximum size of each constant buffer is
+ * 16KB")
+ *
+ * From the Ivy Bridge PRM, volume 2 part 1, page 115:
+ *
+ * "The sum of the Constant Buffer Offset and the Constant Buffer Size
+ * may not exceed the maximum value of the Constant Buffer Size."
+ *
+ * Thus, the valid range of buffer end is [0KB, 16KB].
+ */
+ end = (offset + size) / 1024;
+ if (end > 16) {
+ assert(!"invalid constant buffer end");
+ end = 16;
+ }
+
+ /* the valid range of buffer offset is [0KB, 15KB] */
+ offset = (offset + 1023) / 1024;
+ if (offset > 15) {
+ assert(!"invalid constant buffer offset");
+ offset = 15;
+ }
+
+ if (offset > end) {
+ assert(!size);
+ offset = end;
+ }
+
+ /* the valid range of buffer size is [0KB, 15KB] */
+ size = end - offset;
+ if (size > 15) {
+ assert(!"invalid constant buffer size");
+ size = 15;
+ }
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT |
+ size);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info *dev,
+ int offset, int size,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_push_constant_alloc(dev, 0x12, offset, size, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info *dev,
+ int offset, int size,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_push_constant_alloc(dev, 0x13, offset, size, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info *dev,
+ int offset, int size,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_push_constant_alloc(dev, 0x14, offset, size, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info *dev,
+ int offset, int size,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_push_constant_alloc(dev, 0x15, offset, size, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info *dev,
+ int offset, int size,
+ struct ilo_cp *cp)
+{
+ gen7_emit_3dstate_push_constant_alloc(dev, 0x16, offset, size, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev,
+ const struct pipe_stream_output_info *so_info,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x17);
+ uint16_t cmd_len;
+ int buffer_selects, num_entries, i;
+ uint16_t so_decls[128];
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ buffer_selects = 0;
+ num_entries = 0;
+
+ if (so_info) {
+ int buffer_offsets[PIPE_MAX_SO_BUFFERS];
+
+ memset(buffer_offsets, 0, sizeof(buffer_offsets));
+
+ for (i = 0; i < so_info->num_outputs; i++) {
+ unsigned decl, buf, reg, mask;
+
+ buf = so_info->output[i].output_buffer;
+
+ /* pad with holes */
+ assert(buffer_offsets[buf] <= so_info->output[i].dst_offset);
+ while (buffer_offsets[buf] < so_info->output[i].dst_offset) {
+ int num_dwords;
+
+ num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf];
+ if (num_dwords > 4)
+ num_dwords = 4;
+
+ decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT |
+ SO_DECL_HOLE_FLAG |
+ ((1 << num_dwords) - 1) << SO_DECL_COMPONENT_MASK_SHIFT;
+
+ so_decls[num_entries++] = decl;
+ buffer_offsets[buf] += num_dwords;
+ }
+
+ reg = so_info->output[i].register_index;
+ mask = ((1 << so_info->output[i].num_components) - 1) <<
+ so_info->output[i].start_component;
+
+ decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT |
+ reg << SO_DECL_REGISTER_INDEX_SHIFT |
+ mask << SO_DECL_COMPONENT_MASK_SHIFT;
+
+ so_decls[num_entries++] = decl;
+ buffer_selects |= 1 << buf;
+ buffer_offsets[buf] += so_info->output[i].num_components;
+ }
+ }
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 201:
+ *
+ * "Errata: All 128 decls for all four streams must be included
+ * whenever this command is issued. The "Num Entries [n]" fields still
+ * contain the actual numbers of valid decls."
+ *
+ * Also note that "DWord Length" has 9 bits for this command, and the type
+ * of cmd_len is thus uint16_t.
+ */
+ cmd_len = 2 * 128 + 3;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, 0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT |
+ 0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT |
+ 0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT |
+ buffer_selects << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT);
+ ilo_cp_write(cp, 0 << SO_NUM_ENTRIES_3_SHIFT |
+ 0 << SO_NUM_ENTRIES_2_SHIFT |
+ 0 << SO_NUM_ENTRIES_1_SHIFT |
+ num_entries << SO_NUM_ENTRIES_0_SHIFT);
+
+ for (i = 0; i < num_entries; i++) {
+ ilo_cp_write(cp, so_decls[i]);
+ ilo_cp_write(cp, 0);
+ }
+ for (; i < 128; i++) {
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ }
+
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev,
+ int index, int base, int stride,
+ const struct pipe_stream_output_target *so_target,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x18);
+ const uint8_t cmd_len = 4;
+ struct ilo_buffer *buf;
+ int end;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ if (!so_target || !so_target->buffer) {
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT);
+ ilo_cp_write(cp, 0);
+ ilo_cp_write(cp, 0);
+ ilo_cp_end(cp);
+ return;
+ }
+
+ buf = ilo_buffer(so_target->buffer);
+
+ /* DWord-aligned */
+ assert(stride % 4 == 0 && base % 4 == 0);
+ assert(so_target->buffer_offset % 4 == 0);
+
+ stride &= ~3;
+ base = (base + so_target->buffer_offset) & ~3;
+ end = (base + so_target->buffer_size) & ~3;
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT |
+ stride);
+ ilo_cp_write_bo(cp, base, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
+ ilo_cp_write_bo(cp, end, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
+ ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
+ const struct pipe_draw_info *info,
+ const struct ilo_ib_state *ib,
+ bool rectlist,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
+ const uint8_t cmd_len = 7;
+ const int prim = (rectlist) ?
+ _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
+ const int vb_access = (info->indexed) ?
+ GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
+ GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
+ const uint32_t vb_start = info->start +
+ ((info->indexed) ? ib->draw_start_offset : 0);
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ ilo_cp_begin(cp, cmd_len);
+ ilo_cp_write(cp, cmd | (cmd_len - 2));
+ ilo_cp_write(cp, vb_access | prim);
+ ilo_cp_write(cp, info->count);
+ ilo_cp_write(cp, vb_start);
+ ilo_cp_write(cp, info->instance_count);
+ ilo_cp_write(cp, info->start_instance);
+ ilo_cp_write(cp, info->index_bias);
+ ilo_cp_end(cp);
+}
+
+static inline uint32_t
+gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
+ const struct ilo_viewport_cso *viewports,
+ unsigned num_viewports,
+ struct ilo_cp *cp)
+{
+ const int state_align = 64 / 4;
+ const int state_len = 16 * num_viewports;
+ uint32_t state_offset, *dw;
+ unsigned i;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 270:
+ *
+ * "The viewport-specific state used by both the SF and CL units
+ * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
+ * of which contains the DWords described below. The start of each
+ * element is spaced 16 DWords apart. The location of first element of
+ * the array, as specified by both Pointer to SF_VIEWPORT and Pointer
+ * to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
+ */
+ assert(num_viewports && num_viewports <= 16);
+
+ dw = ilo_cp_steal_ptr(cp, "SF_CLIP_VIEWPORT",
+ state_len, state_align, &state_offset);
+
+ for (i = 0; i < num_viewports; i++) {
+ const struct ilo_viewport_cso *vp = &viewports[i];
+
+ dw[0] = fui(vp->m00);
+ dw[1] = fui(vp->m11);
+ dw[2] = fui(vp->m22);
+ dw[3] = fui(vp->m30);
+ dw[4] = fui(vp->m31);
+ dw[5] = fui(vp->m32);
+ dw[6] = 0;
+ dw[7] = 0;
+ dw[8] = fui(vp->min_gbx);
+ dw[9] = fui(vp->max_gbx);
+ dw[10] = fui(vp->min_gby);
+ dw[11] = fui(vp->max_gby);
+ dw[12] = 0;
+ dw[13] = 0;
+ dw[14] = 0;
+ dw[15] = 0;
+
+ dw += 16;
+ }
+
+ return state_offset;
+}
+
#endif /* ILO_GPE_GEN7_H */