From 4bc9daf923194c3f31fe7b0f7f5f76ea87dee132 Mon Sep 17 00:00:00 2001 From: Courtney Goeltzenleuchter Date: Mon, 5 Aug 2013 14:17:31 -0600 Subject: [PATCH] ilo: move emit functions so that they can be inlined. --- src/gallium/drivers/ilo/ilo_gpe_gen6.c | 2619 +----------------------- src/gallium/drivers/ilo/ilo_gpe_gen6.h | 2419 +++++++++++++++++++++- src/gallium/drivers/ilo/ilo_gpe_gen7.c | 1044 ---------- src/gallium/drivers/ilo/ilo_gpe_gen7.h | 1044 ++++++++++ 4 files changed, 3556 insertions(+), 3570 deletions(-) diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen6.c b/src/gallium/drivers/ilo/ilo_gpe_gen6.c index 1da2925d653..6059276f4d3 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen6.c +++ b/src/gallium/drivers/ilo/ilo_gpe_gen6.c @@ -38,103 +38,6 @@ #include "ilo_state.h" #include "ilo_gpe_gen6.h" -/** - * Translate winsys tiling to hardware tiling. - */ -int -ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling) -{ - switch (tiling) { - case INTEL_TILING_NONE: - return 0; - case INTEL_TILING_X: - return BRW_SURFACE_TILED; - case INTEL_TILING_Y: - return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y; - default: - assert(!"unknown tiling"); - return 0; - } -} - -/** - * Translate a pipe primitive type to the matching hardware primitive type. - */ -int -ilo_gpe_gen6_translate_pipe_prim(unsigned prim) -{ - static const int prim_mapping[PIPE_PRIM_MAX] = { - [PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST, - [PIPE_PRIM_LINES] = _3DPRIM_LINELIST, - [PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP, - [PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP, - [PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST, - [PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, - [PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN, - [PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST, - [PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP, - [PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON, - [PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ, - [PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, - [PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ, - [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, - }; - - assert(prim_mapping[prim]); - - return prim_mapping[prim]; -} - -/** - * Translate a pipe texture target to the matching hardware surface type. - */ -int -ilo_gpe_gen6_translate_texture(enum pipe_texture_target target) -{ - switch (target) { - case PIPE_BUFFER: - return BRW_SURFACE_BUFFER; - case PIPE_TEXTURE_1D: - case PIPE_TEXTURE_1D_ARRAY: - return BRW_SURFACE_1D; - case PIPE_TEXTURE_2D: - case PIPE_TEXTURE_RECT: - case PIPE_TEXTURE_2D_ARRAY: - return BRW_SURFACE_2D; - case PIPE_TEXTURE_3D: - return BRW_SURFACE_3D; - case PIPE_TEXTURE_CUBE: - case PIPE_TEXTURE_CUBE_ARRAY: - return BRW_SURFACE_CUBE; - default: - assert(!"unknown texture target"); - return BRW_SURFACE_BUFFER; - } -} - -/** - * Translate a depth/stencil pipe format to the matching hardware - * format. Return -1 on errors. - */ -static int -gen6_translate_depth_format(enum pipe_format format) -{ - switch (format) { - case PIPE_FORMAT_Z16_UNORM: - return BRW_DEPTHFORMAT_D16_UNORM; - case PIPE_FORMAT_Z32_FLOAT: - return BRW_DEPTHFORMAT_D32_FLOAT; - case PIPE_FORMAT_Z24X8_UNORM: - return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT; - default: - return -1; - } -} - /** * Translate a pipe logicop to the matching hardware logicop. */ @@ -294,28 +197,6 @@ gen6_translate_tex_wrap(unsigned wrap, bool clamp_to_edge) } } -/** - * Translate a pipe DSA test function to the matching hardware compare - * function. - */ -static int -gen6_translate_dsa_func(unsigned func) -{ - switch (func) { - case PIPE_FUNC_NEVER: return BRW_COMPAREFUNCTION_NEVER; - case PIPE_FUNC_LESS: return BRW_COMPAREFUNCTION_LESS; - case PIPE_FUNC_EQUAL: return BRW_COMPAREFUNCTION_EQUAL; - case PIPE_FUNC_LEQUAL: return BRW_COMPAREFUNCTION_LEQUAL; - case PIPE_FUNC_GREATER: return BRW_COMPAREFUNCTION_GREATER; - case PIPE_FUNC_NOTEQUAL: return BRW_COMPAREFUNCTION_NOTEQUAL; - case PIPE_FUNC_GEQUAL: return BRW_COMPAREFUNCTION_GEQUAL; - case PIPE_FUNC_ALWAYS: return BRW_COMPAREFUNCTION_ALWAYS; - default: - assert(!"unknown depth/stencil/alpha test function"); - return BRW_COMPAREFUNCTION_NEVER; - } -} - /** * Translate a pipe shadow compare function to the matching hardware shadow * function. @@ -345,520 +226,6 @@ gen6_translate_shadow_func(unsigned func) } } -/** - * Translate an index size to the matching hardware index format. - */ -static int -gen6_translate_index_size(int size) -{ - switch (size) { - case 4: return BRW_INDEX_DWORD; - case 2: return BRW_INDEX_WORD; - case 1: return BRW_INDEX_BYTE; - default: - assert(!"unknown index size"); - return BRW_INDEX_BYTE; - } -} - -static void -gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev, - struct intel_bo *general_state_bo, - struct intel_bo *surface_state_bo, - struct intel_bo *dynamic_state_bo, - struct intel_bo *indirect_object_bo, - struct intel_bo *instruction_bo, - uint32_t general_state_size, - uint32_t dynamic_state_size, - uint32_t indirect_object_size, - uint32_t instruction_size, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01); - const uint8_t cmd_len = 10; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - /* 4K-page aligned */ - assert(((general_state_size | dynamic_state_size | - indirect_object_size | instruction_size) & 0xfff) == 0); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - - ilo_cp_write_bo(cp, 1, general_state_bo, - INTEL_DOMAIN_RENDER, - 0); - ilo_cp_write_bo(cp, 1, surface_state_bo, - INTEL_DOMAIN_SAMPLER, - 0); - ilo_cp_write_bo(cp, 1, dynamic_state_bo, - INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION, - 0); - ilo_cp_write_bo(cp, 1, indirect_object_bo, - 0, - 0); - ilo_cp_write_bo(cp, 1, instruction_bo, - INTEL_DOMAIN_INSTRUCTION, - 0); - - if (general_state_size) { - ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo, - INTEL_DOMAIN_RENDER, - 0); - } - else { - /* skip range check */ - ilo_cp_write(cp, 1); - } - - if (dynamic_state_size) { - ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo, - INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION, - 0); - } - else { - /* skip range check */ - ilo_cp_write(cp, 0xfffff000 + 1); - } - - if (indirect_object_size) { - ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo, - 0, - 0); - } - else { - /* skip range check */ - ilo_cp_write(cp, 0xfffff000 + 1); - } - - if (instruction_size) { - ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo, - INTEL_DOMAIN_INSTRUCTION, - 0); - } - else { - /* skip range check */ - ilo_cp_write(cp, 1); - } - - ilo_cp_end(cp); -} - -static void -gen6_emit_STATE_SIP(const struct ilo_dev_info *dev, - uint32_t sip, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02); - const uint8_t cmd_len = 2; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - ilo_cp_begin(cp, cmd_len | (cmd_len - 2)); - ilo_cp_write(cp, cmd); - ilo_cp_write(cp, sip); - ilo_cp_end(cp); -} - -static void -gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev, - bool enable, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b); - const uint8_t cmd_len = 1; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | enable); - ilo_cp_end(cp); -} - -static void -gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev, - int pipeline, - struct ilo_cp *cp) -{ - const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04); - const uint8_t cmd_len = 1; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - /* 3D or media */ - assert(pipeline == 0x0 || pipeline == 0x1); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | pipeline); - ilo_cp_end(cp); -} - -static void -gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev, - int max_threads, int num_urb_entries, - int urb_entry_size, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00); - const uint8_t cmd_len = 8; - uint32_t dw2, dw4; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - dw2 = (max_threads - 1) << 16 | - num_urb_entries << 8 | - 1 << 7 | /* Reset Gateway Timer */ - 1 << 6; /* Bypass Gateway Control */ - - dw4 = urb_entry_size << 16 | /* URB Entry Allocation Size */ - 480; /* CURBE Allocation Size */ - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0); /* scratch */ - ilo_cp_write(cp, dw2); - ilo_cp_write(cp, 0); /* MBZ */ - ilo_cp_write(cp, dw4); - ilo_cp_write(cp, 0); /* scoreboard */ - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_end(cp); -} - -static void -gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev, - uint32_t buf, int size, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01); - const uint8_t cmd_len = 4; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - assert(buf % 32 == 0); - /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */ - size = align(size, 32); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0); /* MBZ */ - ilo_cp_write(cp, size); - ilo_cp_write(cp, buf); - ilo_cp_end(cp); -} - -static void -gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev, - uint32_t offset, int num_ids, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02); - const uint8_t cmd_len = 4; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - assert(offset % 32 == 0); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0); /* MBZ */ - /* every ID has 8 DWords */ - ilo_cp_write(cp, num_ids * 8 * 4); - ilo_cp_write(cp, offset); - ilo_cp_end(cp); -} - -static void -gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev, - int id, int byte, int thread_count, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03); - const uint8_t cmd_len = 2; - uint32_t dw1; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - dw1 = id << 16 | - byte << 8 | - thread_count; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, dw1); - ilo_cp_end(cp); -} - -static void -gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev, - int thread_count_water_mark, - int barrier_mask, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04); - const uint8_t cmd_len = 2; - uint32_t dw1; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - dw1 = thread_count_water_mark << 16 | - barrier_mask; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, dw1); - ilo_cp_end(cp); -} - -static void -gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev, - struct ilo_cp *cp) -{ - assert(!"MEDIA_OBJECT_WALKER unsupported"); -} - -static void -gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev, - uint32_t vs_binding_table, - uint32_t gs_binding_table, - uint32_t ps_binding_table, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01); - const uint8_t cmd_len = 4; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2) | - GEN6_BINDING_TABLE_MODIFY_VS | - GEN6_BINDING_TABLE_MODIFY_GS | - GEN6_BINDING_TABLE_MODIFY_PS); - ilo_cp_write(cp, vs_binding_table); - ilo_cp_write(cp, gs_binding_table); - ilo_cp_write(cp, ps_binding_table); - ilo_cp_end(cp); -} - -static void -gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev, - uint32_t vs_sampler_state, - uint32_t gs_sampler_state, - uint32_t ps_sampler_state, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02); - const uint8_t cmd_len = 4; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2) | - VS_SAMPLER_STATE_CHANGE | - GS_SAMPLER_STATE_CHANGE | - PS_SAMPLER_STATE_CHANGE); - ilo_cp_write(cp, vs_sampler_state); - ilo_cp_write(cp, gs_sampler_state); - ilo_cp_write(cp, ps_sampler_state); - ilo_cp_end(cp); -} - -static void -gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev, - int vs_total_size, int gs_total_size, - int vs_entry_size, int gs_entry_size, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05); - const uint8_t cmd_len = 3; - const int row_size = 128; /* 1024 bits */ - int vs_alloc_size, gs_alloc_size; - int vs_num_entries, gs_num_entries; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - /* in 1024-bit URB rows */ - vs_alloc_size = (vs_entry_size + row_size - 1) / row_size; - gs_alloc_size = (gs_entry_size + row_size - 1) / row_size; - - /* the valid range is [1, 5] */ - if (!vs_alloc_size) - vs_alloc_size = 1; - if (!gs_alloc_size) - gs_alloc_size = 1; - assert(vs_alloc_size <= 5 && gs_alloc_size <= 5); - - /* the valid range is [24, 256] in multiples of 4 */ - vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3; - if (vs_num_entries > 256) - vs_num_entries = 256; - assert(vs_num_entries >= 24); - - /* the valid range is [0, 256] in multiples of 4 */ - gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3; - if (gs_num_entries > 256) - gs_num_entries = 256; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_VS_SIZE_SHIFT | - vs_num_entries << GEN6_URB_VS_ENTRIES_SHIFT); - ilo_cp_write(cp, gs_num_entries << GEN6_URB_GS_ENTRIES_SHIFT | - (gs_alloc_size - 1) << GEN6_URB_GS_SIZE_SHIFT); - ilo_cp_end(cp); -} - -static void -gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev, - const struct ilo_ve_state *ve, - const struct ilo_vb_state *vb, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08); - uint8_t cmd_len; - unsigned hw_idx; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 82: - * - * "From 1 to 33 VBs can be specified..." - */ - assert(ve->vb_count <= 33); - - if (!ve->vb_count) - return; - - cmd_len = 1 + 4 * ve->vb_count; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - - for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { - const unsigned instance_divisor = ve->instance_divisors[hw_idx]; - const unsigned pipe_idx = ve->vb_mapping[hw_idx]; - const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx]; - uint32_t dw; - - dw = hw_idx << GEN6_VB0_INDEX_SHIFT; - - if (instance_divisor) - dw |= GEN6_VB0_ACCESS_INSTANCEDATA; - else - dw |= GEN6_VB0_ACCESS_VERTEXDATA; - - if (dev->gen >= ILO_GEN(7)) - dw |= GEN7_VB0_ADDRESS_MODIFYENABLE; - - /* use null vb if there is no buffer or the stride is out of range */ - if (cso->buffer && cso->stride <= 2048) { - const struct ilo_buffer *buf = ilo_buffer(cso->buffer); - const uint32_t start_offset = cso->buffer_offset; - /* - * As noted in ilo_translate_format(), we treat some 3-component - * formats as 4-component formats to work around hardware - * limitations. Imagine the case where the vertex buffer holds a - * single PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6. - * The hardware would not be able to fetch it because the vertex - * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex - * and that takes at least 8 bytes. - * - * For the workaround to work, we query the physical size, which is - * page aligned, to calculate end_offset so that the last vertex has - * a better chance to be fetched. - */ - const uint32_t end_offset = intel_bo_get_size(buf->bo) - 1; - - dw |= cso->stride << BRW_VB0_PITCH_SHIFT; - - ilo_cp_write(cp, dw); - ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0); - ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0); - ilo_cp_write(cp, instance_divisor); - } - else { - dw |= 1 << 13; - - ilo_cp_write(cp, dw); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, instance_divisor); - } - } - - ilo_cp_end(cp); -} - -static void -ve_set_cso_edgeflag(const struct ilo_dev_info *dev, - struct ilo_ve_cso *cso) -{ - int format; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 94: - * - * "- This bit (Edge Flag Enable) must only be ENABLED on the last - * valid VERTEX_ELEMENT structure. - * - * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC, - * and Component 1-3 Control must be set to VFCOMP_NOSTORE. - * - * - The Source Element Format must be set to the UINT format. - * - * - [DevSNB]: Edge Flags are not supported for QUADLIST - * primitives. Software may elect to convert QUADLIST primitives - * to some set of corresponding edge-flag-supported primitive - * types (e.g., POLYGONs) prior to submission to the 3D pipeline." - */ - - cso->payload[0] |= GEN6_VE0_EDGE_FLAG_ENABLE; - cso->payload[1] = - BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | - BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_1_SHIFT | - BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT | - BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT; - - /* - * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via - * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined - * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h. - * - * Since all the hardware cares about is whether the flags are zero or not, - * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case. - */ - format = (cso->payload[0] >> BRW_VE0_FORMAT_SHIFT) & 0x1ff; - if (format == BRW_SURFACEFORMAT_R32_FLOAT) { - STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT == - BRW_SURFACEFORMAT_R32_FLOAT - 1); - - cso->payload[0] -= (1 << BRW_VE0_FORMAT_SHIFT); - } - else { - assert(format == BRW_SURFACEFORMAT_R8_UINT); - } -} - -static void -ve_init_cso_with_components(const struct ilo_dev_info *dev, - int comp0, int comp1, int comp2, int comp3, - struct ilo_ve_cso *cso) -{ - ILO_GPE_VALID_GEN(dev, 6, 7); - - STATIC_ASSERT(Elements(cso->payload) >= 2); - cso->payload[0] = GEN6_VE0_VALID; - cso->payload[1] = - comp0 << BRW_VE1_COMPONENT_0_SHIFT | - comp1 << BRW_VE1_COMPONENT_1_SHIFT | - comp2 << BRW_VE1_COMPONENT_2_SHIFT | - comp3 << BRW_VE1_COMPONENT_3_SHIFT; -} - static void ve_init_cso(const struct ilo_dev_info *dev, const struct pipe_vertex_element *state, @@ -914,202 +281,29 @@ ilo_gpe_init_ve(const struct ilo_dev_info *dev, for (i = 0; i < num_states; i++) { const unsigned pipe_idx = states[i].vertex_buffer_index; - const unsigned instance_divisor = states[i].instance_divisor; - unsigned hw_idx; - - /* - * map the pipe vb to the hardware vb, which has a fixed instance - * divisor - */ - for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { - if (ve->vb_mapping[hw_idx] == pipe_idx && - ve->instance_divisors[hw_idx] == instance_divisor) - break; - } - - /* create one if there is no matching hardware vb */ - if (hw_idx >= ve->vb_count) { - hw_idx = ve->vb_count++; - - ve->vb_mapping[hw_idx] = pipe_idx; - ve->instance_divisors[hw_idx] = instance_divisor; - } - - ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]); - } -} - -static void -gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev, - const struct ilo_ve_state *ve, - bool last_velement_edgeflag, - bool prepend_generated_ids, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09); - uint8_t cmd_len; - unsigned i; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 93: - * - * "Up to 34 (DevSNB+) vertex elements are supported." - */ - assert(ve->count + prepend_generated_ids <= 34); - - if (!ve->count && !prepend_generated_ids) { - struct ilo_ve_cso dummy; - - ve_init_cso_with_components(dev, - BRW_VE1_COMPONENT_STORE_0, - BRW_VE1_COMPONENT_STORE_0, - BRW_VE1_COMPONENT_STORE_0, - BRW_VE1_COMPONENT_STORE_1_FLT, - &dummy); - - cmd_len = 3; - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write_multi(cp, dummy.payload, 2); - ilo_cp_end(cp); - - return; - } - - cmd_len = 2 * (ve->count + prepend_generated_ids) + 1; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - - if (prepend_generated_ids) { - struct ilo_ve_cso gen_ids; - - ve_init_cso_with_components(dev, - BRW_VE1_COMPONENT_STORE_VID, - BRW_VE1_COMPONENT_STORE_IID, - BRW_VE1_COMPONENT_NOSTORE, - BRW_VE1_COMPONENT_NOSTORE, - &gen_ids); - - ilo_cp_write_multi(cp, gen_ids.payload, 2); - } - - if (last_velement_edgeflag) { - struct ilo_ve_cso edgeflag; - - for (i = 0; i < ve->count - 1; i++) - ilo_cp_write_multi(cp, ve->cso[i].payload, 2); - - edgeflag = ve->cso[i]; - ve_set_cso_edgeflag(dev, &edgeflag); - ilo_cp_write_multi(cp, edgeflag.payload, 2); - } - else { - for (i = 0; i < ve->count; i++) - ilo_cp_write_multi(cp, ve->cso[i].payload, 2); - } - - ilo_cp_end(cp); -} - -static void -gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev, - const struct ilo_ib_state *ib, - bool enable_cut_index, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a); - const uint8_t cmd_len = 3; - struct ilo_buffer *buf = ilo_buffer(ib->hw_resource); - uint32_t start_offset, end_offset; - int format; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - if (!buf) - return; - - format = gen6_translate_index_size(ib->hw_index_size); - - /* - * set start_offset to 0 here and adjust pipe_draw_info::start with - * ib->draw_start_offset in 3DPRIMITIVE - */ - start_offset = 0; - end_offset = buf->bo_size; - - /* end_offset must also be aligned and is inclusive */ - end_offset -= (end_offset % ib->hw_index_size); - end_offset--; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2) | - ((enable_cut_index) ? BRW_CUT_INDEX_ENABLE : 0) | - format << 8); - ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0); - ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0); - ilo_cp_end(cp); -} - -static void -gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev, - uint32_t clip_viewport, - uint32_t sf_viewport, - uint32_t cc_viewport, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d); - const uint8_t cmd_len = 4; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2) | - GEN6_CLIP_VIEWPORT_MODIFY | - GEN6_SF_VIEWPORT_MODIFY | - GEN6_CC_VIEWPORT_MODIFY); - ilo_cp_write(cp, clip_viewport); - ilo_cp_write(cp, sf_viewport); - ilo_cp_write(cp, cc_viewport); - ilo_cp_end(cp); -} - -static void -gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev, - uint32_t blend_state, - uint32_t depth_stencil_state, - uint32_t color_calc_state, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e); - const uint8_t cmd_len = 4; - - ILO_GPE_VALID_GEN(dev, 6, 6); + const unsigned instance_divisor = states[i].instance_divisor; + unsigned hw_idx; - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, blend_state | 1); - ilo_cp_write(cp, depth_stencil_state | 1); - ilo_cp_write(cp, color_calc_state | 1); - ilo_cp_end(cp); -} + /* + * map the pipe vb to the hardware vb, which has a fixed instance + * divisor + */ + for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { + if (ve->vb_mapping[hw_idx] == pipe_idx && + ve->instance_divisors[hw_idx] == instance_divisor) + break; + } -static void -gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev, - uint32_t scissor_rect, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f); - const uint8_t cmd_len = 2; + /* create one if there is no matching hardware vb */ + if (hw_idx >= ve->vb_count) { + hw_idx = ve->vb_count++; - ILO_GPE_VALID_GEN(dev, 6, 7); + ve->vb_mapping[hw_idx] = pipe_idx; + ve->instance_divisors[hw_idx] = instance_divisor; + } - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, scissor_rect); - ilo_cp_end(cp); + ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]); + } } void @@ -1189,48 +383,6 @@ ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev, cso->payload[2] = dw5; } -static void -gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev, - const struct ilo_shader_state *vs, - int num_samplers, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10); - const uint8_t cmd_len = 6; - const struct ilo_shader_cso *cso; - uint32_t dw2, dw4, dw5; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - if (!vs) { - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_end(cp); - return; - } - - cso = ilo_shader_get_kernel_cso(vs); - dw2 = cso->payload[0]; - dw4 = cso->payload[1]; - dw5 = cso->payload[2]; - - dw2 |= ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, ilo_shader_get_kernel_offset(vs)); - ilo_cp_write(cp, dw2); - ilo_cp_write(cp, 0); /* scratch */ - ilo_cp_write(cp, dw4); - ilo_cp_write(cp, dw5); - ilo_cp_end(cp); -} - void ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev, const struct ilo_shader_state *gs, @@ -1333,75 +485,6 @@ ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev, cso->payload[3] = dw6; } -static void -gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev, - const struct ilo_shader_state *gs, - const struct ilo_shader_state *vs, - int verts_per_prim, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11); - const uint8_t cmd_len = 7; - uint32_t dw1, dw2, dw4, dw5, dw6; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - if (gs) { - const struct ilo_shader_cso *cso; - - dw1 = ilo_shader_get_kernel_offset(gs); - - cso = ilo_shader_get_kernel_cso(gs); - dw2 = cso->payload[0]; - dw4 = cso->payload[1]; - dw5 = cso->payload[2]; - dw6 = cso->payload[3]; - } - else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) { - struct ilo_shader_cso cso; - enum ilo_kernel_param param; - - switch (verts_per_prim) { - case 1: - param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET; - break; - case 2: - param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET; - break; - default: - param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET; - break; - } - - dw1 = ilo_shader_get_kernel_offset(vs) + - ilo_shader_get_kernel_param(vs, param); - - /* cannot use VS's CSO */ - ilo_gpe_init_gs_cso_gen6(dev, vs, &cso); - dw2 = cso.payload[0]; - dw4 = cso.payload[1]; - dw5 = cso.payload[2]; - dw6 = cso.payload[3]; - } - else { - dw1 = 0; - dw2 = 0; - dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT; - dw5 = GEN6_GS_STATISTICS_ENABLE; - dw6 = 0; - } - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, dw1); - ilo_cp_write(cp, dw2); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, dw4); - ilo_cp_write(cp, dw5); - ilo_cp_write(cp, dw6); - ilo_cp_end(cp); -} - void ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev, const struct pipe_rasterizer_state *state, @@ -1490,53 +573,6 @@ ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev, clip->can_enable_guardband = false; } -static void -gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev, - const struct ilo_rasterizer_state *rasterizer, - const struct ilo_shader_state *fs, - bool enable_guardband, - int num_viewports, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12); - const uint8_t cmd_len = 4; - uint32_t dw1, dw2, dw3; - - if (rasterizer) { - int interps; - - dw1 = rasterizer->clip.payload[0]; - dw2 = rasterizer->clip.payload[1]; - dw3 = rasterizer->clip.payload[2]; - - if (enable_guardband && rasterizer->clip.can_enable_guardband) - dw2 |= GEN6_CLIP_GB_TEST; - - interps = (fs) ? ilo_shader_get_kernel_param(fs, - ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0; - - if (interps & (1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC | - 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC | - 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC)) - dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE; - - dw3 |= GEN6_CLIP_FORCE_ZERO_RTAINDEX | - (num_viewports - 1); - } - else { - dw1 = 0; - dw2 = 0; - dw3 = 0; - } - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, dw1); - ilo_cp_write(cp, dw2); - ilo_cp_write(cp, dw3); - ilo_cp_end(cp); -} - void ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev, const struct pipe_rasterizer_state *state, @@ -1741,172 +777,6 @@ ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev, } } -/** - * Fill in DW2 to DW7 of 3DSTATE_SF. - */ -void -ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev, - const struct ilo_rasterizer_state *rasterizer, - int num_samples, - enum pipe_format depth_format, - uint32_t *payload, unsigned payload_len) -{ - const struct ilo_rasterizer_sf *sf = &rasterizer->sf; - - assert(payload_len == Elements(sf->payload)); - - if (sf) { - memcpy(payload, sf->payload, sizeof(sf->payload)); - - if (num_samples > 1) - payload[1] |= sf->dw_msaa; - - if (dev->gen >= ILO_GEN(7)) { - int format; - - /* separate stencil */ - switch (depth_format) { - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - depth_format = PIPE_FORMAT_Z24X8_UNORM; - break; - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - depth_format = PIPE_FORMAT_Z32_FLOAT;; - break; - case PIPE_FORMAT_S8_UINT: - depth_format = PIPE_FORMAT_NONE; - break; - default: - break; - } - - format = gen6_translate_depth_format(depth_format); - /* FLOAT surface is assumed when there is no depth buffer */ - if (format < 0) - format = BRW_DEPTHFORMAT_D32_FLOAT; - - payload[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT; - } - } - else { - payload[0] = 0; - payload[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0; - payload[2] = 0; - payload[3] = 0; - payload[4] = 0; - payload[5] = 0; - } -} - -/** - * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF. - */ -void -ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev, - const struct ilo_rasterizer_state *rasterizer, - const struct ilo_shader_state *fs, - const struct ilo_shader_state *last_sh, - uint32_t *dw, int num_dwords) -{ - int output_count, vue_offset, vue_len; - const struct ilo_kernel_routing *routing; - - ILO_GPE_VALID_GEN(dev, 6, 7); - assert(num_dwords == 13); - - if (!fs) { - memset(dw, 0, sizeof(dw[0]) * num_dwords); - - if (dev->gen >= ILO_GEN(7)) - dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT; - else - dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT; - - return; - } - - output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT); - assert(output_count <= 32); - - routing = ilo_shader_get_kernel_routing(fs); - - vue_offset = routing->source_skip; - assert(vue_offset % 2 == 0); - vue_offset /= 2; - - vue_len = (routing->source_len + 1) / 2; - if (!vue_len) - vue_len = 1; - - if (dev->gen >= ILO_GEN(7)) { - dw[0] = output_count << GEN7_SBE_NUM_OUTPUTS_SHIFT | - vue_len << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | - vue_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT; - if (routing->swizzle_enable) - dw[0] |= GEN7_SBE_SWIZZLE_ENABLE; - } - else { - dw[0] = output_count << GEN6_SF_NUM_OUTPUTS_SHIFT | - vue_len << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | - vue_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; - if (routing->swizzle_enable) - dw[0] |= GEN6_SF_SWIZZLE_ENABLE; - } - - switch (rasterizer->state.sprite_coord_mode) { - case PIPE_SPRITE_COORD_UPPER_LEFT: - dw[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT; - break; - case PIPE_SPRITE_COORD_LOWER_LEFT: - dw[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT; - break; - } - - STATIC_ASSERT(Elements(routing->swizzles) >= 16); - memcpy(&dw[1], routing->swizzles, 2 * 16); - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 268: - * - * "This field (Point Sprite Texture Coordinate Enable) must be - * programmed to 0 when non-point primitives are rendered." - * - * TODO We do not check that yet. - */ - dw[9] = routing->point_sprite_enable; - - dw[10] = routing->const_interp_enable; - - /* WrapShortest enables */ - dw[11] = 0; - dw[12] = 0; -} - -static void -gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev, - const struct ilo_rasterizer_state *rasterizer, - const struct ilo_shader_state *fs, - const struct ilo_shader_state *last_sh, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13); - const uint8_t cmd_len = 20; - uint32_t payload_raster[6], payload_sbe[13]; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer, - 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster)); - ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer, - fs, last_sh, payload_sbe, Elements(payload_sbe)); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, payload_sbe[0]); - ilo_cp_write_multi(cp, payload_raster, 6); - ilo_cp_write_multi(cp, &payload_sbe[1], 12); - ilo_cp_end(cp); -} - void ilo_gpe_init_rasterizer_wm_gen6(const struct ilo_dev_info *dev, const struct pipe_rasterizer_state *state, @@ -1975,364 +845,79 @@ ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev, dw2 = (true) ? 0 : GEN6_WM_FLOATING_POINT_MODE_ALT; - dw4 = start_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0 | - 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1 | - 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2; - - dw5 = (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 275: - * - * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the - * PS kernel or color calculator has the ability to kill (discard) - * pixels or samples, other than due to depth or stencil testing. - * This bit is required to be ENABLED in the following situations: - * - * The API pixel shader program contains "killpix" or "discard" - * instructions, or other code in the pixel shader kernel that can - * cause the final pixel mask to differ from the pixel mask received - * on dispatch. - * - * A sampler with chroma key enabled with kill pixel mode is used by - * the pixel shader. - * - * Any render target has Alpha Test Enable or AlphaToCoverage Enable - * enabled. - * - * The pixel shader kernel generates and outputs oMask. - * - * Note: As ClipDistance clipping is fully supported in hardware and - * therefore not via PS instructions, there should be no need to - * ENABLE this bit due to ClipDistance clipping." - */ - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL)) - dw5 |= GEN6_WM_KILL_ENABLE; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 275: - * - * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth - * field must be set to disabled." - * - * TODO This is not checked yet. - */ - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z)) - dw5 |= GEN6_WM_COMPUTED_DEPTH; - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z)) - dw5 |= GEN6_WM_USES_SOURCE_DEPTH; - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W)) - dw5 |= GEN6_WM_USES_SOURCE_W; - - /* - * TODO set this bit only when - * - * a) fs writes colors and color is not masked, or - * b) fs writes depth, or - * c) fs or cc kills - */ - if (true) - dw5 |= GEN6_WM_DISPATCH_ENABLE; - - assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET)); - dw5 |= GEN6_WM_8_DISPATCH_ENABLE; - - dw6 = input_count << GEN6_WM_NUM_SF_OUTPUTS_SHIFT | - GEN6_WM_POSOFFSET_NONE | - interps << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; - - STATIC_ASSERT(Elements(cso->payload) >= 4); - cso->payload[0] = dw2; - cso->payload[1] = dw4; - cso->payload[2] = dw5; - cso->payload[3] = dw6; -} - -static void -gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev, - const struct ilo_shader_state *fs, - int num_samplers, - const struct ilo_rasterizer_state *rasterizer, - bool dual_blend, bool cc_may_kill, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14); - const uint8_t cmd_len = 9; - const int num_samples = 1; - const struct ilo_shader_cso *fs_cso; - uint32_t dw2, dw4, dw5, dw6; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - if (!fs) { - /* see brwCreateContext() */ - const int max_threads = (dev->gt == 2) ? 80 : 40; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - /* honor the valid range even if dispatching is disabled */ - ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_end(cp); - - return; - } - - fs_cso = ilo_shader_get_kernel_cso(fs); - dw2 = fs_cso->payload[0]; - dw4 = fs_cso->payload[1]; - dw5 = fs_cso->payload[2]; - dw6 = fs_cso->payload[3]; - - dw2 |= (num_samplers + 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT; - - if (true) { - dw4 |= GEN6_WM_STATISTICS_ENABLE; - } - else { - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 248: - * - * "This bit (Statistics Enable) must be disabled if either of these - * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer - * Resolve Enable or Depth Buffer Resolve Enable." - */ - dw4 |= GEN6_WM_DEPTH_CLEAR; - dw4 |= GEN6_WM_DEPTH_RESOLVE; - dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE; - } - - if (cc_may_kill) { - dw5 |= GEN6_WM_KILL_ENABLE | - GEN6_WM_DISPATCH_ENABLE; - } - - if (dual_blend) - dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE; - - dw5 |= rasterizer->wm.payload[0]; - - dw6 |= rasterizer->wm.payload[1]; - - if (num_samples > 1) { - dw6 |= rasterizer->wm.dw_msaa_rast | - rasterizer->wm.dw_msaa_disp; - } - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs)); - ilo_cp_write(cp, dw2); - ilo_cp_write(cp, 0); /* scratch */ - ilo_cp_write(cp, dw4); - ilo_cp_write(cp, dw5); - ilo_cp_write(cp, dw6); - ilo_cp_write(cp, 0); /* kernel 1 */ - ilo_cp_write(cp, 0); /* kernel 2 */ - ilo_cp_end(cp); -} - -static unsigned -gen6_fill_3dstate_constant(const struct ilo_dev_info *dev, - const uint32_t *bufs, const int *sizes, - int num_bufs, int max_read_length, - uint32_t *dw, int num_dwords) -{ - unsigned enabled = 0x0; - int total_read_length, i; - - assert(num_dwords == 4); - - total_read_length = 0; - for (i = 0; i < 4; i++) { - if (i < num_bufs && sizes[i]) { - /* in 256-bit units minus one */ - const int read_len = (sizes[i] + 31) / 32 - 1; - - assert(bufs[i] % 32 == 0); - assert(read_len < 32); - - enabled |= 1 << i; - dw[i] = bufs[i] | read_len; - - total_read_length += read_len + 1; - } - else { - dw[i] = 0; - } - } - - assert(total_read_length <= max_read_length); - - return enabled; -} - -static void -gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev, - const uint32_t *bufs, const int *sizes, - int num_bufs, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15); - const uint8_t cmd_len = 5; - uint32_t buf_dw[4], buf_enabled; + dw4 = start_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0 | + 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1 | + 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2; - ILO_GPE_VALID_GEN(dev, 6, 6); - assert(num_bufs <= 4); + dw5 = (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT; /* - * From the Sandy Bridge PRM, volume 2 part 1, page 138: + * From the Sandy Bridge PRM, volume 2 part 1, page 275: + * + * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the + * PS kernel or color calculator has the ability to kill (discard) + * pixels or samples, other than due to depth or stencil testing. + * This bit is required to be ENABLED in the following situations: + * + * The API pixel shader program contains "killpix" or "discard" + * instructions, or other code in the pixel shader kernel that can + * cause the final pixel mask to differ from the pixel mask received + * on dispatch. + * + * A sampler with chroma key enabled with kill pixel mode is used by + * the pixel shader. + * + * Any render target has Alpha Test Enable or AlphaToCoverage Enable + * enabled. * - * "The sum of all four read length fields (each incremented to - * represent the actual read length) must be less than or equal to 32" + * The pixel shader kernel generates and outputs oMask. + * + * Note: As ClipDistance clipping is fully supported in hardware and + * therefore not via PS instructions, there should be no need to + * ENABLE this bit due to ClipDistance clipping." */ - buf_enabled = gen6_fill_3dstate_constant(dev, - bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw)); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12); - ilo_cp_write(cp, buf_dw[0]); - ilo_cp_write(cp, buf_dw[1]); - ilo_cp_write(cp, buf_dw[2]); - ilo_cp_write(cp, buf_dw[3]); - ilo_cp_end(cp); -} - -static void -gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev, - const uint32_t *bufs, const int *sizes, - int num_bufs, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16); - const uint8_t cmd_len = 5; - uint32_t buf_dw[4], buf_enabled; - - ILO_GPE_VALID_GEN(dev, 6, 6); - assert(num_bufs <= 4); + if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL)) + dw5 |= GEN6_WM_KILL_ENABLE; /* - * From the Sandy Bridge PRM, volume 2 part 1, page 161: + * From the Sandy Bridge PRM, volume 2 part 1, page 275: + * + * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth + * field must be set to disabled." * - * "The sum of all four read length fields (each incremented to - * represent the actual read length) must be less than or equal to 64" + * TODO This is not checked yet. */ - buf_enabled = gen6_fill_3dstate_constant(dev, - bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw)); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12); - ilo_cp_write(cp, buf_dw[0]); - ilo_cp_write(cp, buf_dw[1]); - ilo_cp_write(cp, buf_dw[2]); - ilo_cp_write(cp, buf_dw[3]); - ilo_cp_end(cp); -} + if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z)) + dw5 |= GEN6_WM_COMPUTED_DEPTH; -static void -gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev, - const uint32_t *bufs, const int *sizes, - int num_bufs, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17); - const uint8_t cmd_len = 5; - uint32_t buf_dw[4], buf_enabled; + if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z)) + dw5 |= GEN6_WM_USES_SOURCE_DEPTH; - ILO_GPE_VALID_GEN(dev, 6, 6); - assert(num_bufs <= 4); + if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W)) + dw5 |= GEN6_WM_USES_SOURCE_W; /* - * From the Sandy Bridge PRM, volume 2 part 1, page 287: + * TODO set this bit only when * - * "The sum of all four read length fields (each incremented to - * represent the actual read length) must be less than or equal to 64" + * a) fs writes colors and color is not masked, or + * b) fs writes depth, or + * c) fs or cc kills */ - buf_enabled = gen6_fill_3dstate_constant(dev, - bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw)); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12); - ilo_cp_write(cp, buf_dw[0]); - ilo_cp_write(cp, buf_dw[1]); - ilo_cp_write(cp, buf_dw[2]); - ilo_cp_write(cp, buf_dw[3]); - ilo_cp_end(cp); -} - -static void -gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev, - unsigned sample_mask, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18); - const uint8_t cmd_len = 2; - const unsigned valid_mask = 0xf; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - sample_mask &= valid_mask; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, sample_mask); - ilo_cp_end(cp); -} - -static void -gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev, - unsigned x, unsigned y, - unsigned width, unsigned height, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00); - const uint8_t cmd_len = 4; - unsigned xmax = x + width - 1; - unsigned ymax = y + height - 1; - int rect_limit; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - if (dev->gen >= ILO_GEN(7)) { - rect_limit = 16383; - } - else { - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 230: - * - * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min) - * must be an even number" - */ - assert(y % 2 == 0); - - rect_limit = 8191; - } - - if (x > rect_limit) x = rect_limit; - if (y > rect_limit) y = rect_limit; - if (xmax > rect_limit) xmax = rect_limit; - if (ymax > rect_limit) ymax = rect_limit; + if (true) + dw5 |= GEN6_WM_DISPATCH_ENABLE; - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, y << 16 | x); - ilo_cp_write(cp, ymax << 16 | xmax); + assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET)); + dw5 |= GEN6_WM_8_DISPATCH_ENABLE; - /* - * There is no need to set the origin. It is intended to support front - * buffer rendering. - */ - ilo_cp_write(cp, 0); + dw6 = input_count << GEN6_WM_NUM_SF_OUTPUTS_SHIFT | + GEN6_WM_POSOFFSET_NONE | + interps << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; - ilo_cp_end(cp); + STATIC_ASSERT(Elements(cso->payload) >= 4); + cso->payload[0] = dw2; + cso->payload[1] = dw4; + cso->payload[2] = dw5; + cso->payload[3] = dw6; } struct ilo_zs_surface_info { @@ -2712,441 +1297,39 @@ ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev, zs->payload[4] = dw5; zs->payload[5] = dw6; - /* do not increment reference count */ - zs->bo = info.zs.bo; - - /* separate stencil */ - if (info.stencil.bo) { - assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 && - info.stencil.stride % 128 == 0); - - zs->payload[6] = info.stencil.stride - 1; - zs->payload[7] = info.stencil.offset; - - /* do not increment reference count */ - zs->separate_s8_bo = info.stencil.bo; - } - else { - zs->payload[6] = 0; - zs->payload[7] = 0; - zs->separate_s8_bo = NULL; - } - - /* hiz */ - if (info.hiz.bo) { - zs->payload[8] = info.hiz.stride - 1; - zs->payload[9] = info.hiz.offset; - - /* do not increment reference count */ - zs->hiz_bo = info.hiz.bo; - } - else { - zs->payload[8] = 0; - zs->payload[9] = 0; - zs->hiz_bo = NULL; - } -} - -static void -gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev, - const struct ilo_zs_surface *zs, - struct ilo_cp *cp) -{ - const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ? - ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05); - const uint8_t cmd_len = 7; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, zs->payload[0]); - ilo_cp_write_bo(cp, zs->payload[1], zs->bo, - INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); - ilo_cp_write(cp, zs->payload[2]); - ilo_cp_write(cp, zs->payload[3]); - ilo_cp_write(cp, zs->payload[4]); - ilo_cp_write(cp, zs->payload[5]); - ilo_cp_end(cp); -} - -static void -gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev, - int x_offset, int y_offset, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06); - const uint8_t cmd_len = 2; - - ILO_GPE_VALID_GEN(dev, 6, 7); - assert(x_offset >= 0 && x_offset <= 31); - assert(y_offset >= 0 && y_offset <= 31); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, x_offset << 8 | y_offset); - ilo_cp_end(cp); -} - -static void -gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev, - const struct pipe_poly_stipple *pattern, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07); - const uint8_t cmd_len = 33; - int i; - - ILO_GPE_VALID_GEN(dev, 6, 7); - assert(Elements(pattern->stipple) == 32); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - for (i = 0; i < 32; i++) - ilo_cp_write(cp, pattern->stipple[i]); - ilo_cp_end(cp); -} - -static void -gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev, - unsigned pattern, unsigned factor, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08); - const uint8_t cmd_len = 3; - unsigned inverse; - - ILO_GPE_VALID_GEN(dev, 6, 7); - assert((pattern & 0xffff) == pattern); - assert(factor >= 1 && factor <= 256); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, pattern); - - if (dev->gen >= ILO_GEN(7)) { - /* in U1.16 */ - inverse = (unsigned) (65536.0f / factor); - ilo_cp_write(cp, inverse << 15 | factor); - } - else { - /* in U1.13 */ - inverse = (unsigned) (8192.0f / factor); - ilo_cp_write(cp, inverse << 16 | factor); - } - - ilo_cp_end(cp); -} - -static void -gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a); - const uint8_t cmd_len = 3; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0 << 16 | 0); - ilo_cp_write(cp, 0 << 16 | 0); - ilo_cp_end(cp); -} - -static void -gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev, - int index, unsigned svbi, - unsigned max_svbi, - bool load_vertex_count, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b); - const uint8_t cmd_len = 4; - uint32_t dw1; - - ILO_GPE_VALID_GEN(dev, 6, 6); - assert(index >= 0 && index < 4); - - dw1 = index << SVB_INDEX_SHIFT; - if (load_vertex_count) - dw1 |= SVB_LOAD_INTERNAL_VERTEX_COUNT; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, dw1); - ilo_cp_write(cp, svbi); - ilo_cp_write(cp, max_svbi); - ilo_cp_end(cp); -} - -static void -gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev, - int num_samples, - const uint32_t *packed_sample_pos, - bool pixel_location_center, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d); - const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3; - uint32_t dw1, dw2, dw3; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - dw1 = (pixel_location_center) ? - MS_PIXEL_LOCATION_CENTER : MS_PIXEL_LOCATION_UPPER_LEFT; - - switch (num_samples) { - case 0: - case 1: - dw1 |= MS_NUMSAMPLES_1; - dw2 = 0; - dw3 = 0; - break; - case 4: - dw1 |= MS_NUMSAMPLES_4; - dw2 = packed_sample_pos[0]; - dw3 = 0; - break; - case 8: - assert(dev->gen >= ILO_GEN(7)); - dw1 |= MS_NUMSAMPLES_8; - dw2 = packed_sample_pos[0]; - dw3 = packed_sample_pos[1]; - break; - default: - assert(!"unsupported sample count"); - dw1 |= MS_NUMSAMPLES_1; - dw2 = 0; - dw3 = 0; - break; - } - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, dw1); - ilo_cp_write(cp, dw2); - if (dev->gen >= ILO_GEN(7)) - ilo_cp_write(cp, dw3); - ilo_cp_end(cp); -} - -static void -gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev, - const struct ilo_zs_surface *zs, - struct ilo_cp *cp) -{ - const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ? - ILO_GPE_CMD(0x3, 0x0, 0x06) : - ILO_GPE_CMD(0x3, 0x1, 0x0e); - const uint8_t cmd_len = 3; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - /* see ilo_gpe_init_zs_surface() */ - ilo_cp_write(cp, zs->payload[6]); - ilo_cp_write_bo(cp, zs->payload[7], zs->separate_s8_bo, - INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); - ilo_cp_end(cp); -} - -static void -gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev, - const struct ilo_zs_surface *zs, - struct ilo_cp *cp) -{ - const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ? - ILO_GPE_CMD(0x3, 0x0, 0x07) : - ILO_GPE_CMD(0x3, 0x1, 0x0f); - const uint8_t cmd_len = 3; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - /* see ilo_gpe_init_zs_surface() */ - ilo_cp_write(cp, zs->payload[8]); - ilo_cp_write_bo(cp, zs->payload[9], zs->hiz_bo, - INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); - ilo_cp_end(cp); -} - -static void -gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev, - uint32_t clear_val, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10); - const uint8_t cmd_len = 2; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2) | - GEN5_DEPTH_CLEAR_VALID); - ilo_cp_write(cp, clear_val); - ilo_cp_end(cp); -} - -static void -gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev, - uint32_t dw1, - struct intel_bo *bo, uint32_t bo_offset, - bool write_qword, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00); - const uint8_t cmd_len = (write_qword) ? 5 : 4; - const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION; - const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - if (dw1 & PIPE_CONTROL_CS_STALL) { - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 73: - * - * "1 of the following must also be set (when CS stall is set): - * - * * Depth Cache Flush Enable ([0] of DW1) - * * Stall at Pixel Scoreboard ([1] of DW1) - * * Depth Stall ([13] of DW1) - * * Post-Sync Operation ([13] of DW1) - * * Render Target Cache Flush Enable ([12] of DW1) - * * Notify Enable ([8] of DW1)" - * - * From the Ivy Bridge PRM, volume 2 part 1, page 61: - * - * "One of the following must also be set (when CS stall is set): - * - * * Render Target Cache Flush Enable ([12] of DW1) - * * Depth Cache Flush Enable ([0] of DW1) - * * Stall at Pixel Scoreboard ([1] of DW1) - * * Depth Stall ([13] of DW1) - * * Post-Sync Operation ([13] of DW1)" - */ - uint32_t bit_test = PIPE_CONTROL_WRITE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_STALL_AT_SCOREBOARD | - PIPE_CONTROL_DEPTH_STALL; - - /* post-sync op */ - bit_test |= PIPE_CONTROL_WRITE_IMMEDIATE | - PIPE_CONTROL_WRITE_DEPTH_COUNT | - PIPE_CONTROL_WRITE_TIMESTAMP; - - if (dev->gen == ILO_GEN(6)) - bit_test |= PIPE_CONTROL_INTERRUPT_ENABLE; - - assert(dw1 & bit_test); - } - - if (dw1 & PIPE_CONTROL_DEPTH_STALL) { - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 73: - * - * "Following bits must be clear (when Depth Stall is set): - * - * * Render Target Cache Flush Enable ([12] of DW1) - * * Depth Cache Flush Enable ([0] of DW1)" - */ - assert(!(dw1 & (PIPE_CONTROL_WRITE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH))); - } - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, dw1); - ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain); - ilo_cp_write(cp, 0); - if (write_qword) - ilo_cp_write(cp, 0); - ilo_cp_end(cp); -} - -static void -gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev, - const struct pipe_draw_info *info, - const struct ilo_ib_state *ib, - bool rectlist, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00); - const uint8_t cmd_len = 6; - const int prim = (rectlist) ? - _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode); - const int vb_access = (info->indexed) ? - GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM : - GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL; - const uint32_t vb_start = info->start + - ((info->indexed) ? ib->draw_start_offset : 0); - - ILO_GPE_VALID_GEN(dev, 6, 6); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2) | - prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | - vb_access); - ilo_cp_write(cp, info->count); - ilo_cp_write(cp, vb_start); - ilo_cp_write(cp, info->instance_count); - ilo_cp_write(cp, info->start_instance); - ilo_cp_write(cp, info->index_bias); - ilo_cp_end(cp); -} - -static uint32_t -gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev, - const struct ilo_shader_state **cs, - uint32_t *sampler_state, - int *num_samplers, - uint32_t *binding_table_state, - int *num_surfaces, - int num_ids, - struct ilo_cp *cp) -{ - /* - * From the Sandy Bridge PRM, volume 2 part 2, page 34: - * - * "(Interface Descriptor Total Length) This field must have the same - * alignment as the Interface Descriptor Data Start Address. - * - * It must be DQWord (32-byte) aligned..." - * - * From the Sandy Bridge PRM, volume 2 part 2, page 35: - * - * "(Interface Descriptor Data Start Address) Specifies the 32-byte - * aligned address of the Interface Descriptor data." - */ - const int state_align = 32 / 4; - const int state_len = (32 / 4) * num_ids; - uint32_t state_offset, *dw; - int i; + /* do not increment reference count */ + zs->bo = info.zs.bo; - ILO_GPE_VALID_GEN(dev, 6, 6); + /* separate stencil */ + if (info.stencil.bo) { + assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 && + info.stencil.stride % 128 == 0); + + zs->payload[6] = info.stencil.stride - 1; + zs->payload[7] = info.stencil.offset; - dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA", - state_len, state_align, &state_offset); - - for (i = 0; i < num_ids; i++) { - dw[0] = ilo_shader_get_kernel_offset(cs[i]); - dw[1] = 1 << 18; /* SPF */ - dw[2] = sampler_state[i] | - (num_samplers[i] + 3) / 4 << 2; - dw[3] = binding_table_state[i] | - num_surfaces[i]; - dw[4] = 0 << 16 | /* CURBE Read Length */ - 0; /* CURBE Read Offset */ - dw[5] = 0; /* Barrier ID */ - dw[6] = 0; - dw[7] = 0; - - dw += 8; + /* do not increment reference count */ + zs->separate_s8_bo = info.stencil.bo; + } + else { + zs->payload[6] = 0; + zs->payload[7] = 0; + zs->separate_s8_bo = NULL; } - return state_offset; + /* hiz */ + if (info.hiz.bo) { + zs->payload[8] = info.hiz.stride - 1; + zs->payload[9] = info.hiz.offset; + + /* do not increment reference count */ + zs->hiz_bo = info.hiz.bo; + } + else { + zs->payload[8] = 0; + zs->payload[9] = 0; + zs->hiz_bo = NULL; + } } static void @@ -3248,149 +1431,6 @@ ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev, vp->max_z = scale_z * 1.0f + state->translate[2]; } -static uint32_t -gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev, - const struct ilo_viewport_cso *viewports, - unsigned num_viewports, - struct ilo_cp *cp) -{ - const int state_align = 32 / 4; - const int state_len = 8 * num_viewports; - uint32_t state_offset, *dw; - unsigned i; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 262: - * - * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is - * stored as an array of up to 16 elements..." - */ - assert(num_viewports && num_viewports <= 16); - - dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT", - state_len, state_align, &state_offset); - - for (i = 0; i < num_viewports; i++) { - const struct ilo_viewport_cso *vp = &viewports[i]; - - dw[0] = fui(vp->m00); - dw[1] = fui(vp->m11); - dw[2] = fui(vp->m22); - dw[3] = fui(vp->m30); - dw[4] = fui(vp->m31); - dw[5] = fui(vp->m32); - dw[6] = 0; - dw[7] = 0; - - dw += 8; - } - - return state_offset; -} - -static uint32_t -gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev, - const struct ilo_viewport_cso *viewports, - unsigned num_viewports, - struct ilo_cp *cp) -{ - const int state_align = 32 / 4; - const int state_len = 4 * num_viewports; - uint32_t state_offset, *dw; - unsigned i; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 193: - * - * "The viewport-related state is stored as an array of up to 16 - * elements..." - */ - assert(num_viewports && num_viewports <= 16); - - dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT", - state_len, state_align, &state_offset); - - for (i = 0; i < num_viewports; i++) { - const struct ilo_viewport_cso *vp = &viewports[i]; - - dw[0] = fui(vp->min_gbx); - dw[1] = fui(vp->max_gbx); - dw[2] = fui(vp->min_gby); - dw[3] = fui(vp->max_gby); - - dw += 4; - } - - return state_offset; -} - -static uint32_t -gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev, - const struct ilo_viewport_cso *viewports, - unsigned num_viewports, - struct ilo_cp *cp) -{ - const int state_align = 32 / 4; - const int state_len = 2 * num_viewports; - uint32_t state_offset, *dw; - unsigned i; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 385: - * - * "The viewport state is stored as an array of up to 16 elements..." - */ - assert(num_viewports && num_viewports <= 16); - - dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT", - state_len, state_align, &state_offset); - - for (i = 0; i < num_viewports; i++) { - const struct ilo_viewport_cso *vp = &viewports[i]; - - dw[0] = fui(vp->min_z); - dw[1] = fui(vp->max_z); - - dw += 2; - } - - return state_offset; -} - -static uint32_t -gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev, - const struct pipe_stencil_ref *stencil_ref, - float alpha_ref, - const struct pipe_blend_color *blend_color, - struct ilo_cp *cp) -{ - const int state_align = 64 / 4; - const int state_len = 6; - uint32_t state_offset, *dw; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE", - state_len, state_align, &state_offset); - - dw[0] = stencil_ref->ref_value[0] << 24 | - stencil_ref->ref_value[1] << 16 | - BRW_ALPHATEST_FORMAT_UNORM8; - dw[1] = float_to_ubyte(alpha_ref); - dw[2] = fui(blend_color->color[0]); - dw[3] = fui(blend_color->color[1]); - dw[4] = fui(blend_color->color[2]); - dw[5] = fui(blend_color->color[3]); - - return state_offset; -} - static int gen6_blend_factor_dst_alpha_forced_one(int factor) { @@ -3537,134 +1577,6 @@ ilo_gpe_init_blend(const struct ilo_dev_info *dev, } } -static uint32_t -gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev, - const struct ilo_blend_state *blend, - const struct ilo_fb_state *fb, - const struct pipe_alpha_state *alpha, - struct ilo_cp *cp) -{ - const int state_align = 64 / 4; - int state_len; - uint32_t state_offset, *dw; - unsigned num_targets, i; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 376: - * - * "The blend state is stored as an array of up to 8 elements..." - */ - num_targets = fb->state.nr_cbufs; - assert(num_targets <= 8); - - if (!num_targets) { - if (!alpha->enabled) - return 0; - /* to be able to reference alpha func */ - num_targets = 1; - } - - state_len = 2 * num_targets; - - dw = ilo_cp_steal_ptr(cp, "BLEND_STATE", - state_len, state_align, &state_offset); - - for (i = 0; i < num_targets; i++) { - const unsigned idx = (blend->independent_blend_enable) ? i : 0; - const struct ilo_blend_cso *cso = &blend->cso[idx]; - const int num_samples = fb->num_samples; - const struct util_format_description *format_desc = - (idx < fb->state.nr_cbufs) ? - util_format_description(fb->state.cbufs[idx]->format) : NULL; - bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one; - - rt_is_unorm = true; - rt_is_pure_integer = false; - rt_dst_alpha_forced_one = false; - - if (format_desc) { - int ch; - - switch (format_desc->format) { - case PIPE_FORMAT_B8G8R8X8_UNORM: - /* force alpha to one when the HW format has alpha */ - assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM) - == BRW_SURFACEFORMAT_B8G8R8A8_UNORM); - rt_dst_alpha_forced_one = true; - break; - default: - break; - } - - for (ch = 0; ch < 4; ch++) { - if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID) - continue; - - if (format_desc->channel[ch].pure_integer) { - rt_is_unorm = false; - rt_is_pure_integer = true; - break; - } - - if (!format_desc->channel[ch].normalized || - format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED) - rt_is_unorm = false; - } - } - - dw[0] = cso->payload[0]; - dw[1] = cso->payload[1]; - - if (!rt_is_pure_integer) { - if (rt_dst_alpha_forced_one) - dw[0] |= cso->dw_blend_dst_alpha_forced_one; - else - dw[0] |= cso->dw_blend; - } - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 365: - * - * "Logic Ops are only supported on *_UNORM surfaces (excluding - * _SRGB variants), otherwise Logic Ops must be DISABLED." - * - * Since logicop is ignored for non-UNORM color buffers, no special care - * is needed. - */ - if (rt_is_unorm) - dw[1] |= cso->dw_logicop; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 356: - * - * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage - * Dither both must be disabled." - * - * There is no such limitation on GEN7, or for AlphaToOne. But GL - * requires that anyway. - */ - if (num_samples > 1) - dw[1] |= cso->dw_alpha_mod; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 382: - * - * "Alpha Test can only be enabled if Pixel Shader outputs a float - * alpha value." - */ - if (alpha->enabled && !rt_is_pure_integer) { - dw[1] |= 1 << 16 | - gen6_translate_dsa_func(alpha->func) << 13; - } - - dw += 2; - } - - return state_offset; -} - void ilo_gpe_init_dsa(const struct ilo_dev_info *dev, const struct pipe_depth_stencil_alpha_state *state, @@ -3748,28 +1660,6 @@ ilo_gpe_init_dsa(const struct ilo_dev_info *dev, dw[2] |= BRW_COMPAREFUNCTION_ALWAYS << 27; } -static uint32_t -gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev, - const struct ilo_dsa_state *dsa, - struct ilo_cp *cp) -{ - const int state_align = 64 / 4; - const int state_len = 3; - uint32_t state_offset, *dw; - - - ILO_GPE_VALID_GEN(dev, 6, 7); - - dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE", - state_len, state_align, &state_offset); - - dw[0] = dsa->payload[0]; - dw[1] = dsa->payload[1]; - dw[2] = dsa->payload[2]; - - return state_offset; -} - void ilo_gpe_set_scissor(const struct ilo_dev_info *dev, unsigned start_slot, @@ -3820,64 +1710,6 @@ ilo_gpe_set_scissor_null(const struct ilo_dev_info *dev, } } -static uint32_t -gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev, - const struct ilo_scissor_state *scissor, - unsigned num_viewports, - struct ilo_cp *cp) -{ - const int state_align = 32 / 4; - const int state_len = 2 * num_viewports; - uint32_t state_offset, *dw; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 263: - * - * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is - * stored as an array of up to 16 elements..." - */ - assert(num_viewports && num_viewports <= 16); - - dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT", - state_len, state_align, &state_offset); - - memcpy(dw, scissor->payload, state_len * 4); - - return state_offset; -} - -static uint32_t -gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev, - uint32_t *surface_states, - int num_surface_states, - struct ilo_cp *cp) -{ - const int state_align = 32 / 4; - const int state_len = num_surface_states; - uint32_t state_offset, *dw; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 69: - * - * "It is stored as an array of up to 256 elements..." - */ - assert(num_surface_states <= 256); - - if (!num_surface_states) - return 0; - - dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE", - state_len, state_align, &state_offset); - memcpy(dw, surface_states, - num_surface_states * sizeof(surface_states[0])); - - return state_offset; -} - void ilo_gpe_init_view_surface_null_gen6(const struct ilo_dev_info *dev, unsigned width, unsigned height, @@ -4236,92 +2068,6 @@ ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev, surf->bo = tex->bo; } -static uint32_t -gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev, - const struct ilo_view_surface *surf, - bool for_render, - struct ilo_cp *cp) -{ - const int state_align = 32 / 4; - const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6; - uint32_t state_offset; - uint32_t read_domains, write_domain; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - if (for_render) { - read_domains = INTEL_DOMAIN_RENDER; - write_domain = INTEL_DOMAIN_RENDER; - } - else { - read_domains = INTEL_DOMAIN_SAMPLER; - write_domain = 0; - } - - ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset); - - STATIC_ASSERT(Elements(surf->payload) >= 8); - - ilo_cp_write(cp, surf->payload[0]); - ilo_cp_write_bo(cp, surf->payload[1], - surf->bo, read_domains, write_domain); - ilo_cp_write(cp, surf->payload[2]); - ilo_cp_write(cp, surf->payload[3]); - ilo_cp_write(cp, surf->payload[4]); - ilo_cp_write(cp, surf->payload[5]); - - if (dev->gen >= ILO_GEN(7)) { - ilo_cp_write(cp, surf->payload[6]); - ilo_cp_write(cp, surf->payload[7]); - } - - ilo_cp_end(cp); - - return state_offset; -} - -static uint32_t -gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev, - const struct pipe_stream_output_target *so, - const struct pipe_stream_output_info *so_info, - int so_index, - struct ilo_cp *cp) -{ - struct ilo_buffer *buf = ilo_buffer(so->buffer); - unsigned bo_offset, struct_size; - enum pipe_format elem_format; - struct ilo_view_surface surf; - - ILO_GPE_VALID_GEN(dev, 6, 6); - - bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4; - struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4; - - switch (so_info->output[so_index].num_components) { - case 1: - elem_format = PIPE_FORMAT_R32_FLOAT; - break; - case 2: - elem_format = PIPE_FORMAT_R32G32_FLOAT; - break; - case 3: - elem_format = PIPE_FORMAT_R32G32B32_FLOAT; - break; - case 4: - elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - break; - default: - assert(!"unexpected SO components length"); - elem_format = PIPE_FORMAT_R32_FLOAT; - break; - } - - ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size, - struct_size, elem_format, false, true, &surf); - - return gen6_emit_SURFACE_STATE(dev, &surf, false, cp); -} - static void sampler_init_border_color_gen6(const struct ilo_dev_info *dev, const union pipe_color_union *color, @@ -4680,165 +2426,6 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev, } } -static uint32_t -gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev, - const struct ilo_sampler_cso * const *samplers, - const struct pipe_sampler_view * const *views, - const uint32_t *sampler_border_colors, - int num_samplers, - struct ilo_cp *cp) -{ - const int state_align = 32 / 4; - const int state_len = 4 * num_samplers; - uint32_t state_offset, *dw; - int i; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 101: - * - * "The sampler state is stored as an array of up to 16 elements..." - */ - assert(num_samplers <= 16); - - if (!num_samplers) - return 0; - - dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE", - state_len, state_align, &state_offset); - - for (i = 0; i < num_samplers; i++) { - const struct ilo_sampler_cso *sampler = samplers[i]; - const struct pipe_sampler_view *view = views[i]; - const uint32_t border_color = sampler_border_colors[i]; - uint32_t dw_filter, dw_wrap; - - /* there may be holes */ - if (!sampler || !view) { - /* disabled sampler */ - dw[0] = 1 << 31; - dw[1] = 0; - dw[2] = 0; - dw[3] = 0; - dw += 4; - - continue; - } - - /* determine filter and wrap modes */ - switch (view->texture->target) { - case PIPE_TEXTURE_1D: - dw_filter = (sampler->anisotropic) ? - sampler->dw_filter_aniso : sampler->dw_filter; - dw_wrap = sampler->dw_wrap_1d; - break; - case PIPE_TEXTURE_3D: - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 103: - * - * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for - * surfaces of type SURFTYPE_3D." - */ - dw_filter = sampler->dw_filter; - dw_wrap = sampler->dw_wrap; - break; - case PIPE_TEXTURE_CUBE: - dw_filter = (sampler->anisotropic) ? - sampler->dw_filter_aniso : sampler->dw_filter; - dw_wrap = sampler->dw_wrap_cube; - break; - default: - dw_filter = (sampler->anisotropic) ? - sampler->dw_filter_aniso : sampler->dw_filter; - dw_wrap = sampler->dw_wrap; - break; - } - - dw[0] = sampler->payload[0]; - dw[1] = sampler->payload[1]; - assert(!(border_color & 0x1f)); - dw[2] = border_color; - dw[3] = sampler->payload[2]; - - dw[0] |= dw_filter; - - if (dev->gen >= ILO_GEN(7)) { - dw[3] |= dw_wrap; - } - else { - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 21: - * - * "[DevSNB] Errata: Incorrect behavior is observed in cases - * where the min and mag mode filters are different and - * SurfMinLOD is nonzero. The determination of MagMode uses the - * following equation instead of the one in the above - * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)" - * - * As a way to work around that, we set Base to - * view->u.tex.first_level. - */ - dw[0] |= view->u.tex.first_level << 22; - - dw[1] |= dw_wrap; - } - - dw += 4; - } - - return state_offset; -} - -static uint32_t -gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev, - const struct ilo_sampler_cso *sampler, - struct ilo_cp *cp) -{ - const int state_align = 32 / 4; - const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12; - uint32_t state_offset, *dw; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE", - state_len, state_align, &state_offset); - - /* see ilo_gpe_init_sampler_cso() */ - memcpy(dw, &sampler->payload[3], state_len * 4); - - return state_offset; -} - -static uint32_t -gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev, - int size, void **pcb, - struct ilo_cp *cp) -{ - /* - * For all VS, GS, FS, and CS push constant buffers, they must be aligned - * to 32 bytes, and their sizes are specified in 256-bit units. - */ - const int state_align = 32 / 4; - const int state_len = align(size, 32) / 4; - uint32_t state_offset; - char *buf; - - ILO_GPE_VALID_GEN(dev, 6, 7); - - buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER", - state_len, state_align, &state_offset); - - /* zero out the unused range */ - if (size < state_len * 4) - memset(&buf[size], 0, state_len * 4 - size); - - if (pcb) - *pcb = buf; - - return state_offset; -} - static int gen6_estimate_command_size(const struct ilo_dev_info *dev, enum ilo_gpe_gen6_command cmd, diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen6.h b/src/gallium/drivers/ilo/ilo_gpe_gen6.h index 300176428c1..7d4bbb59dfe 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen6.h +++ b/src/gallium/drivers/ilo/ilo_gpe_gen6.h @@ -28,7 +28,16 @@ #ifndef ILO_GPE_GEN6_H #define ILO_GPE_GEN6_H +#include "util/u_dual_blend.h" +#include "util/u_half.h" +#include "brw_defines.h" +#include "intel_reg.h" + #include "ilo_common.h" +#include "ilo_cp.h" +#include "ilo_format.h" +#include "ilo_resource.h" +#include "ilo_shader.h" #include "ilo_gpe.h" #define ILO_GPE_VALID_GEN(dev, min_gen, max_gen) \ @@ -533,27 +542,2417 @@ ilo_gpe_gen6_get(void); /* Below are helpers for other GENs */ -int -ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling); +/** + * Translate a depth/stencil pipe format to the matching hardware + * format. Return -1 on errors. + */ +static inline int +gen6_translate_depth_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + return BRW_DEPTHFORMAT_D16_UNORM; + case PIPE_FORMAT_Z32_FLOAT: + return BRW_DEPTHFORMAT_D32_FLOAT; + case PIPE_FORMAT_Z24X8_UNORM: + return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT; + default: + return -1; + } +} + +/** + * Translate winsys tiling to hardware tiling. + */ +static inline int +ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling) +{ + switch (tiling) { + case INTEL_TILING_NONE: + return 0; + case INTEL_TILING_X: + return BRW_SURFACE_TILED; + case INTEL_TILING_Y: + return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y; + default: + assert(!"unknown tiling"); + return 0; + } +} -int -ilo_gpe_gen6_translate_pipe_prim(unsigned prim); +/** + * Translate a pipe primitive type to the matching hardware primitive type. + */ +static inline int +ilo_gpe_gen6_translate_pipe_prim(unsigned prim) +{ + static const int prim_mapping[PIPE_PRIM_MAX] = { + [PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST, + [PIPE_PRIM_LINES] = _3DPRIM_LINELIST, + [PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP, + [PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP, + [PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST, + [PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST, + [PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP, + [PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON, + [PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ, + [PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, + [PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ, + [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, + }; + + assert(prim_mapping[prim]); + + return prim_mapping[prim]; +} -int -ilo_gpe_gen6_translate_texture(enum pipe_texture_target target); +/** + * Translate a pipe texture target to the matching hardware surface type. + */ +static inline int +ilo_gpe_gen6_translate_texture(enum pipe_texture_target target) +{ + switch (target) { + case PIPE_BUFFER: + return BRW_SURFACE_BUFFER; + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + return BRW_SURFACE_1D; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_2D_ARRAY: + return BRW_SURFACE_2D; + case PIPE_TEXTURE_3D: + return BRW_SURFACE_3D; + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + return BRW_SURFACE_CUBE; + default: + assert(!"unknown texture target"); + return BRW_SURFACE_BUFFER; + } +} -void +/** + * Fill in DW2 to DW7 of 3DSTATE_SF. + */ +static inline void ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev, const struct ilo_rasterizer_state *rasterizer, int num_samples, enum pipe_format depth_format, - uint32_t *payload, unsigned payload_len); + uint32_t *payload, unsigned payload_len) +{ + const struct ilo_rasterizer_sf *sf = &rasterizer->sf; + + assert(payload_len == Elements(sf->payload)); + + if (sf) { + memcpy(payload, sf->payload, sizeof(sf->payload)); + + if (num_samples > 1) + payload[1] |= sf->dw_msaa; + + if (dev->gen >= ILO_GEN(7)) { + int format; + + /* separate stencil */ + switch (depth_format) { + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + depth_format = PIPE_FORMAT_Z24X8_UNORM; + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + depth_format = PIPE_FORMAT_Z32_FLOAT;; + break; + case PIPE_FORMAT_S8_UINT: + depth_format = PIPE_FORMAT_NONE; + break; + default: + break; + } + + format = gen6_translate_depth_format(depth_format); + /* FLOAT surface is assumed when there is no depth buffer */ + if (format < 0) + format = BRW_DEPTHFORMAT_D32_FLOAT; + + payload[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT; + } + } + else { + payload[0] = 0; + payload[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0; + payload[2] = 0; + payload[3] = 0; + payload[4] = 0; + payload[5] = 0; + } +} -void +/** + * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF. + */ +static inline void ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev, const struct ilo_rasterizer_state *rasterizer, const struct ilo_shader_state *fs, const struct ilo_shader_state *last_sh, - uint32_t *dw, int num_dwords); + uint32_t *dw, int num_dwords) +{ + int output_count, vue_offset, vue_len; + const struct ilo_kernel_routing *routing; + + ILO_GPE_VALID_GEN(dev, 6, 7); + assert(num_dwords == 13); + + if (!fs) { + memset(dw, 0, sizeof(dw[0]) * num_dwords); + + if (dev->gen >= ILO_GEN(7)) + dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT; + else + dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT; + + return; + } + + output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT); + assert(output_count <= 32); + + routing = ilo_shader_get_kernel_routing(fs); + + vue_offset = routing->source_skip; + assert(vue_offset % 2 == 0); + vue_offset /= 2; + + vue_len = (routing->source_len + 1) / 2; + if (!vue_len) + vue_len = 1; + + if (dev->gen >= ILO_GEN(7)) { + dw[0] = output_count << GEN7_SBE_NUM_OUTPUTS_SHIFT | + vue_len << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | + vue_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT; + if (routing->swizzle_enable) + dw[0] |= GEN7_SBE_SWIZZLE_ENABLE; + } + else { + dw[0] = output_count << GEN6_SF_NUM_OUTPUTS_SHIFT | + vue_len << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | + vue_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; + if (routing->swizzle_enable) + dw[0] |= GEN6_SF_SWIZZLE_ENABLE; + } + + switch (rasterizer->state.sprite_coord_mode) { + case PIPE_SPRITE_COORD_UPPER_LEFT: + dw[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT; + break; + case PIPE_SPRITE_COORD_LOWER_LEFT: + dw[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT; + break; + } + + STATIC_ASSERT(Elements(routing->swizzles) >= 16); + memcpy(&dw[1], routing->swizzles, 2 * 16); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 268: + * + * "This field (Point Sprite Texture Coordinate Enable) must be + * programmed to 0 when non-point primitives are rendered." + * + * TODO We do not check that yet. + */ + dw[9] = routing->point_sprite_enable; + + dw[10] = routing->const_interp_enable; + + /* WrapShortest enables */ + dw[11] = 0; + dw[12] = 0; +} + +static inline void +gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev, + struct intel_bo *general_state_bo, + struct intel_bo *surface_state_bo, + struct intel_bo *dynamic_state_bo, + struct intel_bo *indirect_object_bo, + struct intel_bo *instruction_bo, + uint32_t general_state_size, + uint32_t dynamic_state_size, + uint32_t indirect_object_size, + uint32_t instruction_size, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01); + const uint8_t cmd_len = 10; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + /* 4K-page aligned */ + assert(((general_state_size | dynamic_state_size | + indirect_object_size | instruction_size) & 0xfff) == 0); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + + ilo_cp_write_bo(cp, 1, general_state_bo, + INTEL_DOMAIN_RENDER, + 0); + ilo_cp_write_bo(cp, 1, surface_state_bo, + INTEL_DOMAIN_SAMPLER, + 0); + ilo_cp_write_bo(cp, 1, dynamic_state_bo, + INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION, + 0); + ilo_cp_write_bo(cp, 1, indirect_object_bo, + 0, + 0); + ilo_cp_write_bo(cp, 1, instruction_bo, + INTEL_DOMAIN_INSTRUCTION, + 0); + + if (general_state_size) { + ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo, + INTEL_DOMAIN_RENDER, + 0); + } + else { + /* skip range check */ + ilo_cp_write(cp, 1); + } + + if (dynamic_state_size) { + ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo, + INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION, + 0); + } + else { + /* skip range check */ + ilo_cp_write(cp, 0xfffff000 + 1); + } + + if (indirect_object_size) { + ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo, + 0, + 0); + } + else { + /* skip range check */ + ilo_cp_write(cp, 0xfffff000 + 1); + } + + if (instruction_size) { + ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo, + INTEL_DOMAIN_INSTRUCTION, + 0); + } + else { + /* skip range check */ + ilo_cp_write(cp, 1); + } + + ilo_cp_end(cp); +} + +static inline void +gen6_emit_STATE_SIP(const struct ilo_dev_info *dev, + uint32_t sip, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02); + const uint8_t cmd_len = 2; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + ilo_cp_begin(cp, cmd_len | (cmd_len - 2)); + ilo_cp_write(cp, cmd); + ilo_cp_write(cp, sip); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev, + bool enable, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b); + const uint8_t cmd_len = 1; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | enable); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev, + int pipeline, + struct ilo_cp *cp) +{ + const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04); + const uint8_t cmd_len = 1; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + /* 3D or media */ + assert(pipeline == 0x0 || pipeline == 0x1); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | pipeline); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev, + int max_threads, int num_urb_entries, + int urb_entry_size, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00); + const uint8_t cmd_len = 8; + uint32_t dw2, dw4; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + dw2 = (max_threads - 1) << 16 | + num_urb_entries << 8 | + 1 << 7 | /* Reset Gateway Timer */ + 1 << 6; /* Bypass Gateway Control */ + + dw4 = urb_entry_size << 16 | /* URB Entry Allocation Size */ + 480; /* CURBE Allocation Size */ + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0); /* scratch */ + ilo_cp_write(cp, dw2); + ilo_cp_write(cp, 0); /* MBZ */ + ilo_cp_write(cp, dw4); + ilo_cp_write(cp, 0); /* scoreboard */ + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev, + uint32_t buf, int size, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01); + const uint8_t cmd_len = 4; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + assert(buf % 32 == 0); + /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */ + size = align(size, 32); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0); /* MBZ */ + ilo_cp_write(cp, size); + ilo_cp_write(cp, buf); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev, + uint32_t offset, int num_ids, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02); + const uint8_t cmd_len = 4; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + assert(offset % 32 == 0); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0); /* MBZ */ + /* every ID has 8 DWords */ + ilo_cp_write(cp, num_ids * 8 * 4); + ilo_cp_write(cp, offset); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev, + int id, int byte, int thread_count, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03); + const uint8_t cmd_len = 2; + uint32_t dw1; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + dw1 = id << 16 | + byte << 8 | + thread_count; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, dw1); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev, + int thread_count_water_mark, + int barrier_mask, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04); + const uint8_t cmd_len = 2; + uint32_t dw1; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + dw1 = thread_count_water_mark << 16 | + barrier_mask; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, dw1); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev, + struct ilo_cp *cp) +{ + assert(!"MEDIA_OBJECT_WALKER unsupported"); +} + +static inline void +gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev, + uint32_t vs_binding_table, + uint32_t gs_binding_table, + uint32_t ps_binding_table, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01); + const uint8_t cmd_len = 4; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2) | + GEN6_BINDING_TABLE_MODIFY_VS | + GEN6_BINDING_TABLE_MODIFY_GS | + GEN6_BINDING_TABLE_MODIFY_PS); + ilo_cp_write(cp, vs_binding_table); + ilo_cp_write(cp, gs_binding_table); + ilo_cp_write(cp, ps_binding_table); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev, + uint32_t vs_sampler_state, + uint32_t gs_sampler_state, + uint32_t ps_sampler_state, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02); + const uint8_t cmd_len = 4; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2) | + VS_SAMPLER_STATE_CHANGE | + GS_SAMPLER_STATE_CHANGE | + PS_SAMPLER_STATE_CHANGE); + ilo_cp_write(cp, vs_sampler_state); + ilo_cp_write(cp, gs_sampler_state); + ilo_cp_write(cp, ps_sampler_state); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev, + int vs_total_size, int gs_total_size, + int vs_entry_size, int gs_entry_size, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05); + const uint8_t cmd_len = 3; + const int row_size = 128; /* 1024 bits */ + int vs_alloc_size, gs_alloc_size; + int vs_num_entries, gs_num_entries; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + /* in 1024-bit URB rows */ + vs_alloc_size = (vs_entry_size + row_size - 1) / row_size; + gs_alloc_size = (gs_entry_size + row_size - 1) / row_size; + + /* the valid range is [1, 5] */ + if (!vs_alloc_size) + vs_alloc_size = 1; + if (!gs_alloc_size) + gs_alloc_size = 1; + assert(vs_alloc_size <= 5 && gs_alloc_size <= 5); + + /* the valid range is [24, 256] in multiples of 4 */ + vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3; + if (vs_num_entries > 256) + vs_num_entries = 256; + assert(vs_num_entries >= 24); + + /* the valid range is [0, 256] in multiples of 4 */ + gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3; + if (gs_num_entries > 256) + gs_num_entries = 256; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_VS_SIZE_SHIFT | + vs_num_entries << GEN6_URB_VS_ENTRIES_SHIFT); + ilo_cp_write(cp, gs_num_entries << GEN6_URB_GS_ENTRIES_SHIFT | + (gs_alloc_size - 1) << GEN6_URB_GS_SIZE_SHIFT); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev, + const struct ilo_ve_state *ve, + const struct ilo_vb_state *vb, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08); + uint8_t cmd_len; + unsigned hw_idx; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 82: + * + * "From 1 to 33 VBs can be specified..." + */ + assert(ve->vb_count <= 33); + + if (!ve->vb_count) + return; + + cmd_len = 1 + 4 * ve->vb_count; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + + for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { + const unsigned instance_divisor = ve->instance_divisors[hw_idx]; + const unsigned pipe_idx = ve->vb_mapping[hw_idx]; + const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx]; + uint32_t dw; + + dw = hw_idx << GEN6_VB0_INDEX_SHIFT; + + if (instance_divisor) + dw |= GEN6_VB0_ACCESS_INSTANCEDATA; + else + dw |= GEN6_VB0_ACCESS_VERTEXDATA; + + if (dev->gen >= ILO_GEN(7)) + dw |= GEN7_VB0_ADDRESS_MODIFYENABLE; + + /* use null vb if there is no buffer or the stride is out of range */ + if (cso->buffer && cso->stride <= 2048) { + const struct ilo_buffer *buf = ilo_buffer(cso->buffer); + const uint32_t start_offset = cso->buffer_offset; + /* + * As noted in ilo_translate_format(), we treat some 3-component + * formats as 4-component formats to work around hardware + * limitations. Imagine the case where the vertex buffer holds a + * single PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6. + * The hardware would not be able to fetch it because the vertex + * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex + * and that takes at least 8 bytes. + * + * For the workaround to work, we query the physical size, which is + * page aligned, to calculate end_offset so that the last vertex has + * a better chance to be fetched. + */ + const uint32_t end_offset = intel_bo_get_size(buf->bo) - 1; + + dw |= cso->stride << BRW_VB0_PITCH_SHIFT; + + ilo_cp_write(cp, dw); + ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0); + ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0); + ilo_cp_write(cp, instance_divisor); + } + else { + dw |= 1 << 13; + + ilo_cp_write(cp, dw); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, instance_divisor); + } + } + + ilo_cp_end(cp); +} + +static inline void +ve_init_cso_with_components(const struct ilo_dev_info *dev, + int comp0, int comp1, int comp2, int comp3, + struct ilo_ve_cso *cso) +{ + ILO_GPE_VALID_GEN(dev, 6, 7); + + STATIC_ASSERT(Elements(cso->payload) >= 2); + cso->payload[0] = GEN6_VE0_VALID; + cso->payload[1] = + comp0 << BRW_VE1_COMPONENT_0_SHIFT | + comp1 << BRW_VE1_COMPONENT_1_SHIFT | + comp2 << BRW_VE1_COMPONENT_2_SHIFT | + comp3 << BRW_VE1_COMPONENT_3_SHIFT; +} + +static inline void +ve_set_cso_edgeflag(const struct ilo_dev_info *dev, + struct ilo_ve_cso *cso) +{ + int format; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 94: + * + * "- This bit (Edge Flag Enable) must only be ENABLED on the last + * valid VERTEX_ELEMENT structure. + * + * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC, + * and Component 1-3 Control must be set to VFCOMP_NOSTORE. + * + * - The Source Element Format must be set to the UINT format. + * + * - [DevSNB]: Edge Flags are not supported for QUADLIST + * primitives. Software may elect to convert QUADLIST primitives + * to some set of corresponding edge-flag-supported primitive + * types (e.g., POLYGONs) prior to submission to the 3D pipeline." + */ + + cso->payload[0] |= GEN6_VE0_EDGE_FLAG_ENABLE; + cso->payload[1] = + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | + BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_1_SHIFT | + BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT | + BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT; + + /* + * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via + * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined + * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h. + * + * Since all the hardware cares about is whether the flags are zero or not, + * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case. + */ + format = (cso->payload[0] >> BRW_VE0_FORMAT_SHIFT) & 0x1ff; + if (format == BRW_SURFACEFORMAT_R32_FLOAT) { + STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT == + BRW_SURFACEFORMAT_R32_FLOAT - 1); + + cso->payload[0] -= (1 << BRW_VE0_FORMAT_SHIFT); + } + else { + assert(format == BRW_SURFACEFORMAT_R8_UINT); + } +} + +static inline void +gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev, + const struct ilo_ve_state *ve, + bool last_velement_edgeflag, + bool prepend_generated_ids, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09); + uint8_t cmd_len; + unsigned i; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 93: + * + * "Up to 34 (DevSNB+) vertex elements are supported." + */ + assert(ve->count + prepend_generated_ids <= 34); + + if (!ve->count && !prepend_generated_ids) { + struct ilo_ve_cso dummy; + + ve_init_cso_with_components(dev, + BRW_VE1_COMPONENT_STORE_0, + BRW_VE1_COMPONENT_STORE_0, + BRW_VE1_COMPONENT_STORE_0, + BRW_VE1_COMPONENT_STORE_1_FLT, + &dummy); + + cmd_len = 3; + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write_multi(cp, dummy.payload, 2); + ilo_cp_end(cp); + + return; + } + + cmd_len = 2 * (ve->count + prepend_generated_ids) + 1; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + + if (prepend_generated_ids) { + struct ilo_ve_cso gen_ids; + + ve_init_cso_with_components(dev, + BRW_VE1_COMPONENT_STORE_VID, + BRW_VE1_COMPONENT_STORE_IID, + BRW_VE1_COMPONENT_NOSTORE, + BRW_VE1_COMPONENT_NOSTORE, + &gen_ids); + + ilo_cp_write_multi(cp, gen_ids.payload, 2); + } + + if (last_velement_edgeflag) { + struct ilo_ve_cso edgeflag; + + for (i = 0; i < ve->count - 1; i++) + ilo_cp_write_multi(cp, ve->cso[i].payload, 2); + + edgeflag = ve->cso[i]; + ve_set_cso_edgeflag(dev, &edgeflag); + ilo_cp_write_multi(cp, edgeflag.payload, 2); + } + else { + for (i = 0; i < ve->count; i++) + ilo_cp_write_multi(cp, ve->cso[i].payload, 2); + } + + ilo_cp_end(cp); +} + +/** + * Translate an index size to the matching hardware index format. + */ +static inline int +gen6_translate_index_size(int size) +{ + switch (size) { + case 4: return BRW_INDEX_DWORD; + case 2: return BRW_INDEX_WORD; + case 1: return BRW_INDEX_BYTE; + default: + assert(!"unknown index size"); + return BRW_INDEX_BYTE; + } +} + +static inline void +gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev, + const struct ilo_ib_state *ib, + bool enable_cut_index, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a); + const uint8_t cmd_len = 3; + struct ilo_buffer *buf = ilo_buffer(ib->hw_resource); + uint32_t start_offset, end_offset; + int format; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + if (!buf) + return; + + format = gen6_translate_index_size(ib->hw_index_size); + + /* + * set start_offset to 0 here and adjust pipe_draw_info::start with + * ib->draw_start_offset in 3DPRIMITIVE + */ + start_offset = 0; + end_offset = buf->bo_size; + + /* end_offset must also be aligned and is inclusive */ + end_offset -= (end_offset % ib->hw_index_size); + end_offset--; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2) | + ((enable_cut_index) ? BRW_CUT_INDEX_ENABLE : 0) | + format << 8); + ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0); + ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev, + uint32_t clip_viewport, + uint32_t sf_viewport, + uint32_t cc_viewport, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d); + const uint8_t cmd_len = 4; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2) | + GEN6_CLIP_VIEWPORT_MODIFY | + GEN6_SF_VIEWPORT_MODIFY | + GEN6_CC_VIEWPORT_MODIFY); + ilo_cp_write(cp, clip_viewport); + ilo_cp_write(cp, sf_viewport); + ilo_cp_write(cp, cc_viewport); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev, + uint32_t blend_state, + uint32_t depth_stencil_state, + uint32_t color_calc_state, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e); + const uint8_t cmd_len = 4; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, blend_state | 1); + ilo_cp_write(cp, depth_stencil_state | 1); + ilo_cp_write(cp, color_calc_state | 1); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev, + uint32_t scissor_rect, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f); + const uint8_t cmd_len = 2; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, scissor_rect); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev, + const struct ilo_shader_state *vs, + int num_samplers, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10); + const uint8_t cmd_len = 6; + const struct ilo_shader_cso *cso; + uint32_t dw2, dw4, dw5; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + if (!vs) { + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_end(cp); + return; + } + + cso = ilo_shader_get_kernel_cso(vs); + dw2 = cso->payload[0]; + dw4 = cso->payload[1]; + dw5 = cso->payload[2]; + + dw2 |= ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, ilo_shader_get_kernel_offset(vs)); + ilo_cp_write(cp, dw2); + ilo_cp_write(cp, 0); /* scratch */ + ilo_cp_write(cp, dw4); + ilo_cp_write(cp, dw5); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev, + const struct ilo_shader_state *gs, + const struct ilo_shader_state *vs, + int verts_per_prim, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11); + const uint8_t cmd_len = 7; + uint32_t dw1, dw2, dw4, dw5, dw6; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + if (gs) { + const struct ilo_shader_cso *cso; + + dw1 = ilo_shader_get_kernel_offset(gs); + + cso = ilo_shader_get_kernel_cso(gs); + dw2 = cso->payload[0]; + dw4 = cso->payload[1]; + dw5 = cso->payload[2]; + dw6 = cso->payload[3]; + } + else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) { + struct ilo_shader_cso cso; + enum ilo_kernel_param param; + + switch (verts_per_prim) { + case 1: + param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET; + break; + case 2: + param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET; + break; + default: + param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET; + break; + } + + dw1 = ilo_shader_get_kernel_offset(vs) + + ilo_shader_get_kernel_param(vs, param); + + /* cannot use VS's CSO */ + ilo_gpe_init_gs_cso_gen6(dev, vs, &cso); + dw2 = cso.payload[0]; + dw4 = cso.payload[1]; + dw5 = cso.payload[2]; + dw6 = cso.payload[3]; + } + else { + dw1 = 0; + dw2 = 0; + dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT; + dw5 = GEN6_GS_STATISTICS_ENABLE; + dw6 = 0; + } + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, dw1); + ilo_cp_write(cp, dw2); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, dw4); + ilo_cp_write(cp, dw5); + ilo_cp_write(cp, dw6); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev, + const struct ilo_rasterizer_state *rasterizer, + const struct ilo_shader_state *fs, + bool enable_guardband, + int num_viewports, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12); + const uint8_t cmd_len = 4; + uint32_t dw1, dw2, dw3; + + if (rasterizer) { + int interps; + + dw1 = rasterizer->clip.payload[0]; + dw2 = rasterizer->clip.payload[1]; + dw3 = rasterizer->clip.payload[2]; + + if (enable_guardband && rasterizer->clip.can_enable_guardband) + dw2 |= GEN6_CLIP_GB_TEST; + + interps = (fs) ? ilo_shader_get_kernel_param(fs, + ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0; + + if (interps & (1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC | + 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC | + 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC)) + dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE; + + dw3 |= GEN6_CLIP_FORCE_ZERO_RTAINDEX | + (num_viewports - 1); + } + else { + dw1 = 0; + dw2 = 0; + dw3 = 0; + } + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, dw1); + ilo_cp_write(cp, dw2); + ilo_cp_write(cp, dw3); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev, + const struct ilo_rasterizer_state *rasterizer, + const struct ilo_shader_state *fs, + const struct ilo_shader_state *last_sh, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13); + const uint8_t cmd_len = 20; + uint32_t payload_raster[6], payload_sbe[13]; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer, + 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster)); + ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer, + fs, last_sh, payload_sbe, Elements(payload_sbe)); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, payload_sbe[0]); + ilo_cp_write_multi(cp, payload_raster, 6); + ilo_cp_write_multi(cp, &payload_sbe[1], 12); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev, + const struct ilo_shader_state *fs, + int num_samplers, + const struct ilo_rasterizer_state *rasterizer, + bool dual_blend, bool cc_may_kill, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14); + const uint8_t cmd_len = 9; + const int num_samples = 1; + const struct ilo_shader_cso *fs_cso; + uint32_t dw2, dw4, dw5, dw6; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + if (!fs) { + /* see brwCreateContext() */ + const int max_threads = (dev->gt == 2) ? 80 : 40; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + /* honor the valid range even if dispatching is disabled */ + ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_end(cp); + + return; + } + + fs_cso = ilo_shader_get_kernel_cso(fs); + dw2 = fs_cso->payload[0]; + dw4 = fs_cso->payload[1]; + dw5 = fs_cso->payload[2]; + dw6 = fs_cso->payload[3]; + + dw2 |= (num_samplers + 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT; + + if (true) { + dw4 |= GEN6_WM_STATISTICS_ENABLE; + } + else { + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 248: + * + * "This bit (Statistics Enable) must be disabled if either of these + * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer + * Resolve Enable or Depth Buffer Resolve Enable." + */ + dw4 |= GEN6_WM_DEPTH_CLEAR; + dw4 |= GEN6_WM_DEPTH_RESOLVE; + dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE; + } + + if (cc_may_kill) { + dw5 |= GEN6_WM_KILL_ENABLE | + GEN6_WM_DISPATCH_ENABLE; + } + + if (dual_blend) + dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE; + + dw5 |= rasterizer->wm.payload[0]; + + dw6 |= rasterizer->wm.payload[1]; + + if (num_samples > 1) { + dw6 |= rasterizer->wm.dw_msaa_rast | + rasterizer->wm.dw_msaa_disp; + } + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs)); + ilo_cp_write(cp, dw2); + ilo_cp_write(cp, 0); /* scratch */ + ilo_cp_write(cp, dw4); + ilo_cp_write(cp, dw5); + ilo_cp_write(cp, dw6); + ilo_cp_write(cp, 0); /* kernel 1 */ + ilo_cp_write(cp, 0); /* kernel 2 */ + ilo_cp_end(cp); +} + +static inline unsigned +gen6_fill_3dstate_constant(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, int max_read_length, + uint32_t *dw, int num_dwords) +{ + unsigned enabled = 0x0; + int total_read_length, i; + + assert(num_dwords == 4); + + total_read_length = 0; + for (i = 0; i < 4; i++) { + if (i < num_bufs && sizes[i]) { + /* in 256-bit units minus one */ + const int read_len = (sizes[i] + 31) / 32 - 1; + + assert(bufs[i] % 32 == 0); + assert(read_len < 32); + + enabled |= 1 << i; + dw[i] = bufs[i] | read_len; + + total_read_length += read_len + 1; + } + else { + dw[i] = 0; + } + } + + assert(total_read_length <= max_read_length); + + return enabled; +} + +static inline void +gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15); + const uint8_t cmd_len = 5; + uint32_t buf_dw[4], buf_enabled; + + ILO_GPE_VALID_GEN(dev, 6, 6); + assert(num_bufs <= 4); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 138: + * + * "The sum of all four read length fields (each incremented to + * represent the actual read length) must be less than or equal to 32" + */ + buf_enabled = gen6_fill_3dstate_constant(dev, + bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw)); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12); + ilo_cp_write(cp, buf_dw[0]); + ilo_cp_write(cp, buf_dw[1]); + ilo_cp_write(cp, buf_dw[2]); + ilo_cp_write(cp, buf_dw[3]); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16); + const uint8_t cmd_len = 5; + uint32_t buf_dw[4], buf_enabled; + + ILO_GPE_VALID_GEN(dev, 6, 6); + assert(num_bufs <= 4); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 161: + * + * "The sum of all four read length fields (each incremented to + * represent the actual read length) must be less than or equal to 64" + */ + buf_enabled = gen6_fill_3dstate_constant(dev, + bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw)); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12); + ilo_cp_write(cp, buf_dw[0]); + ilo_cp_write(cp, buf_dw[1]); + ilo_cp_write(cp, buf_dw[2]); + ilo_cp_write(cp, buf_dw[3]); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17); + const uint8_t cmd_len = 5; + uint32_t buf_dw[4], buf_enabled; + + ILO_GPE_VALID_GEN(dev, 6, 6); + assert(num_bufs <= 4); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 287: + * + * "The sum of all four read length fields (each incremented to + * represent the actual read length) must be less than or equal to 64" + */ + buf_enabled = gen6_fill_3dstate_constant(dev, + bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw)); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12); + ilo_cp_write(cp, buf_dw[0]); + ilo_cp_write(cp, buf_dw[1]); + ilo_cp_write(cp, buf_dw[2]); + ilo_cp_write(cp, buf_dw[3]); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev, + unsigned sample_mask, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18); + const uint8_t cmd_len = 2; + const unsigned valid_mask = 0xf; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + sample_mask &= valid_mask; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, sample_mask); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev, + unsigned x, unsigned y, + unsigned width, unsigned height, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00); + const uint8_t cmd_len = 4; + unsigned xmax = x + width - 1; + unsigned ymax = y + height - 1; + int rect_limit; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + if (dev->gen >= ILO_GEN(7)) { + rect_limit = 16383; + } + else { + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 230: + * + * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min) + * must be an even number" + */ + assert(y % 2 == 0); + + rect_limit = 8191; + } + + if (x > rect_limit) x = rect_limit; + if (y > rect_limit) y = rect_limit; + if (xmax > rect_limit) xmax = rect_limit; + if (ymax > rect_limit) ymax = rect_limit; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, y << 16 | x); + ilo_cp_write(cp, ymax << 16 | xmax); + + /* + * There is no need to set the origin. It is intended to support front + * buffer rendering. + */ + ilo_cp_write(cp, 0); + + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev, + const struct ilo_zs_surface *zs, + struct ilo_cp *cp) +{ + const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ? + ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05); + const uint8_t cmd_len = 7; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, zs->payload[0]); + ilo_cp_write_bo(cp, zs->payload[1], zs->bo, + INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); + ilo_cp_write(cp, zs->payload[2]); + ilo_cp_write(cp, zs->payload[3]); + ilo_cp_write(cp, zs->payload[4]); + ilo_cp_write(cp, zs->payload[5]); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev, + int x_offset, int y_offset, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06); + const uint8_t cmd_len = 2; + + ILO_GPE_VALID_GEN(dev, 6, 7); + assert(x_offset >= 0 && x_offset <= 31); + assert(y_offset >= 0 && y_offset <= 31); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, x_offset << 8 | y_offset); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev, + const struct pipe_poly_stipple *pattern, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07); + const uint8_t cmd_len = 33; + int i; + + ILO_GPE_VALID_GEN(dev, 6, 7); + assert(Elements(pattern->stipple) == 32); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + for (i = 0; i < 32; i++) + ilo_cp_write(cp, pattern->stipple[i]); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev, + unsigned pattern, unsigned factor, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08); + const uint8_t cmd_len = 3; + unsigned inverse; + + ILO_GPE_VALID_GEN(dev, 6, 7); + assert((pattern & 0xffff) == pattern); + assert(factor >= 1 && factor <= 256); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, pattern); + + if (dev->gen >= ILO_GEN(7)) { + /* in U1.16 */ + inverse = (unsigned) (65536.0f / factor); + ilo_cp_write(cp, inverse << 15 | factor); + } + else { + /* in U1.13 */ + inverse = (unsigned) (8192.0f / factor); + ilo_cp_write(cp, inverse << 16 | factor); + } + + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a); + const uint8_t cmd_len = 3; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0 << 16 | 0); + ilo_cp_write(cp, 0 << 16 | 0); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev, + int index, unsigned svbi, + unsigned max_svbi, + bool load_vertex_count, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b); + const uint8_t cmd_len = 4; + uint32_t dw1; + + ILO_GPE_VALID_GEN(dev, 6, 6); + assert(index >= 0 && index < 4); + + dw1 = index << SVB_INDEX_SHIFT; + if (load_vertex_count) + dw1 |= SVB_LOAD_INTERNAL_VERTEX_COUNT; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, dw1); + ilo_cp_write(cp, svbi); + ilo_cp_write(cp, max_svbi); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev, + int num_samples, + const uint32_t *packed_sample_pos, + bool pixel_location_center, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d); + const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3; + uint32_t dw1, dw2, dw3; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + dw1 = (pixel_location_center) ? + MS_PIXEL_LOCATION_CENTER : MS_PIXEL_LOCATION_UPPER_LEFT; + + switch (num_samples) { + case 0: + case 1: + dw1 |= MS_NUMSAMPLES_1; + dw2 = 0; + dw3 = 0; + break; + case 4: + dw1 |= MS_NUMSAMPLES_4; + dw2 = packed_sample_pos[0]; + dw3 = 0; + break; + case 8: + assert(dev->gen >= ILO_GEN(7)); + dw1 |= MS_NUMSAMPLES_8; + dw2 = packed_sample_pos[0]; + dw3 = packed_sample_pos[1]; + break; + default: + assert(!"unsupported sample count"); + dw1 |= MS_NUMSAMPLES_1; + dw2 = 0; + dw3 = 0; + break; + } + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, dw1); + ilo_cp_write(cp, dw2); + if (dev->gen >= ILO_GEN(7)) + ilo_cp_write(cp, dw3); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev, + const struct ilo_zs_surface *zs, + struct ilo_cp *cp) +{ + const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ? + ILO_GPE_CMD(0x3, 0x0, 0x06) : + ILO_GPE_CMD(0x3, 0x1, 0x0e); + const uint8_t cmd_len = 3; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + /* see ilo_gpe_init_zs_surface() */ + ilo_cp_write(cp, zs->payload[6]); + ilo_cp_write_bo(cp, zs->payload[7], zs->separate_s8_bo, + INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev, + const struct ilo_zs_surface *zs, + struct ilo_cp *cp) +{ + const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ? + ILO_GPE_CMD(0x3, 0x0, 0x07) : + ILO_GPE_CMD(0x3, 0x1, 0x0f); + const uint8_t cmd_len = 3; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + /* see ilo_gpe_init_zs_surface() */ + ilo_cp_write(cp, zs->payload[8]); + ilo_cp_write_bo(cp, zs->payload[9], zs->hiz_bo, + INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev, + uint32_t clear_val, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10); + const uint8_t cmd_len = 2; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2) | + GEN5_DEPTH_CLEAR_VALID); + ilo_cp_write(cp, clear_val); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev, + uint32_t dw1, + struct intel_bo *bo, uint32_t bo_offset, + bool write_qword, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00); + const uint8_t cmd_len = (write_qword) ? 5 : 4; + const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION; + const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + if (dw1 & PIPE_CONTROL_CS_STALL) { + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 73: + * + * "1 of the following must also be set (when CS stall is set): + * + * * Depth Cache Flush Enable ([0] of DW1) + * * Stall at Pixel Scoreboard ([1] of DW1) + * * Depth Stall ([13] of DW1) + * * Post-Sync Operation ([13] of DW1) + * * Render Target Cache Flush Enable ([12] of DW1) + * * Notify Enable ([8] of DW1)" + * + * From the Ivy Bridge PRM, volume 2 part 1, page 61: + * + * "One of the following must also be set (when CS stall is set): + * + * * Render Target Cache Flush Enable ([12] of DW1) + * * Depth Cache Flush Enable ([0] of DW1) + * * Stall at Pixel Scoreboard ([1] of DW1) + * * Depth Stall ([13] of DW1) + * * Post-Sync Operation ([13] of DW1)" + */ + uint32_t bit_test = PIPE_CONTROL_WRITE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_STALL_AT_SCOREBOARD | + PIPE_CONTROL_DEPTH_STALL; + + /* post-sync op */ + bit_test |= PIPE_CONTROL_WRITE_IMMEDIATE | + PIPE_CONTROL_WRITE_DEPTH_COUNT | + PIPE_CONTROL_WRITE_TIMESTAMP; + + if (dev->gen == ILO_GEN(6)) + bit_test |= PIPE_CONTROL_INTERRUPT_ENABLE; + + assert(dw1 & bit_test); + } + + if (dw1 & PIPE_CONTROL_DEPTH_STALL) { + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 73: + * + * "Following bits must be clear (when Depth Stall is set): + * + * * Render Target Cache Flush Enable ([12] of DW1) + * * Depth Cache Flush Enable ([0] of DW1)" + */ + assert(!(dw1 & (PIPE_CONTROL_WRITE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH))); + } + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, dw1); + ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain); + ilo_cp_write(cp, 0); + if (write_qword) + ilo_cp_write(cp, 0); + ilo_cp_end(cp); +} + +static inline void +gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev, + const struct pipe_draw_info *info, + const struct ilo_ib_state *ib, + bool rectlist, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00); + const uint8_t cmd_len = 6; + const int prim = (rectlist) ? + _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode); + const int vb_access = (info->indexed) ? + GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM : + GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL; + const uint32_t vb_start = info->start + + ((info->indexed) ? ib->draw_start_offset : 0); + + ILO_GPE_VALID_GEN(dev, 6, 6); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2) | + prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | + vb_access); + ilo_cp_write(cp, info->count); + ilo_cp_write(cp, vb_start); + ilo_cp_write(cp, info->instance_count); + ilo_cp_write(cp, info->start_instance); + ilo_cp_write(cp, info->index_bias); + ilo_cp_end(cp); +} + +static inline uint32_t +gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev, + const struct ilo_shader_state **cs, + uint32_t *sampler_state, + int *num_samplers, + uint32_t *binding_table_state, + int *num_surfaces, + int num_ids, + struct ilo_cp *cp) +{ + /* + * From the Sandy Bridge PRM, volume 2 part 2, page 34: + * + * "(Interface Descriptor Total Length) This field must have the same + * alignment as the Interface Descriptor Data Start Address. + * + * It must be DQWord (32-byte) aligned..." + * + * From the Sandy Bridge PRM, volume 2 part 2, page 35: + * + * "(Interface Descriptor Data Start Address) Specifies the 32-byte + * aligned address of the Interface Descriptor data." + */ + const int state_align = 32 / 4; + const int state_len = (32 / 4) * num_ids; + uint32_t state_offset, *dw; + int i; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA", + state_len, state_align, &state_offset); + + for (i = 0; i < num_ids; i++) { + dw[0] = ilo_shader_get_kernel_offset(cs[i]); + dw[1] = 1 << 18; /* SPF */ + dw[2] = sampler_state[i] | + (num_samplers[i] + 3) / 4 << 2; + dw[3] = binding_table_state[i] | + num_surfaces[i]; + dw[4] = 0 << 16 | /* CURBE Read Length */ + 0; /* CURBE Read Offset */ + dw[5] = 0; /* Barrier ID */ + dw[6] = 0; + dw[7] = 0; + + dw += 8; + } + + return state_offset; +} + +static inline uint32_t +gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports, + struct ilo_cp *cp) +{ + const int state_align = 32 / 4; + const int state_len = 8 * num_viewports; + uint32_t state_offset, *dw; + unsigned i; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 262: + * + * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is + * stored as an array of up to 16 elements..." + */ + assert(num_viewports && num_viewports <= 16); + + dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT", + state_len, state_align, &state_offset); + + for (i = 0; i < num_viewports; i++) { + const struct ilo_viewport_cso *vp = &viewports[i]; + + dw[0] = fui(vp->m00); + dw[1] = fui(vp->m11); + dw[2] = fui(vp->m22); + dw[3] = fui(vp->m30); + dw[4] = fui(vp->m31); + dw[5] = fui(vp->m32); + dw[6] = 0; + dw[7] = 0; + + dw += 8; + } + + return state_offset; +} + +static inline uint32_t +gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports, + struct ilo_cp *cp) +{ + const int state_align = 32 / 4; + const int state_len = 4 * num_viewports; + uint32_t state_offset, *dw; + unsigned i; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 193: + * + * "The viewport-related state is stored as an array of up to 16 + * elements..." + */ + assert(num_viewports && num_viewports <= 16); + + dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT", + state_len, state_align, &state_offset); + + for (i = 0; i < num_viewports; i++) { + const struct ilo_viewport_cso *vp = &viewports[i]; + + dw[0] = fui(vp->min_gbx); + dw[1] = fui(vp->max_gbx); + dw[2] = fui(vp->min_gby); + dw[3] = fui(vp->max_gby); + + dw += 4; + } + + return state_offset; +} + +static inline uint32_t +gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports, + struct ilo_cp *cp) +{ + const int state_align = 32 / 4; + const int state_len = 2 * num_viewports; + uint32_t state_offset, *dw; + unsigned i; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 385: + * + * "The viewport state is stored as an array of up to 16 elements..." + */ + assert(num_viewports && num_viewports <= 16); + + dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT", + state_len, state_align, &state_offset); + + for (i = 0; i < num_viewports; i++) { + const struct ilo_viewport_cso *vp = &viewports[i]; + + dw[0] = fui(vp->min_z); + dw[1] = fui(vp->max_z); + + dw += 2; + } + + return state_offset; +} + +static inline uint32_t +gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev, + const struct pipe_stencil_ref *stencil_ref, + float alpha_ref, + const struct pipe_blend_color *blend_color, + struct ilo_cp *cp) +{ + const int state_align = 64 / 4; + const int state_len = 6; + uint32_t state_offset, *dw; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE", + state_len, state_align, &state_offset); + + dw[0] = stencil_ref->ref_value[0] << 24 | + stencil_ref->ref_value[1] << 16 | + BRW_ALPHATEST_FORMAT_UNORM8; + dw[1] = float_to_ubyte(alpha_ref); + dw[2] = fui(blend_color->color[0]); + dw[3] = fui(blend_color->color[1]); + dw[4] = fui(blend_color->color[2]); + dw[5] = fui(blend_color->color[3]); + + return state_offset; +} + +/** + * Translate a pipe DSA test function to the matching hardware compare + * function. + */ +static int +gen6_translate_dsa_func(unsigned func) +{ + switch (func) { + case PIPE_FUNC_NEVER: return BRW_COMPAREFUNCTION_NEVER; + case PIPE_FUNC_LESS: return BRW_COMPAREFUNCTION_LESS; + case PIPE_FUNC_EQUAL: return BRW_COMPAREFUNCTION_EQUAL; + case PIPE_FUNC_LEQUAL: return BRW_COMPAREFUNCTION_LEQUAL; + case PIPE_FUNC_GREATER: return BRW_COMPAREFUNCTION_GREATER; + case PIPE_FUNC_NOTEQUAL: return BRW_COMPAREFUNCTION_NOTEQUAL; + case PIPE_FUNC_GEQUAL: return BRW_COMPAREFUNCTION_GEQUAL; + case PIPE_FUNC_ALWAYS: return BRW_COMPAREFUNCTION_ALWAYS; + default: + assert(!"unknown depth/stencil/alpha test function"); + return BRW_COMPAREFUNCTION_NEVER; + } +} + +static inline uint32_t +gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev, + const struct ilo_blend_state *blend, + const struct ilo_fb_state *fb, + const struct pipe_alpha_state *alpha, + struct ilo_cp *cp) +{ + const int state_align = 64 / 4; + int state_len; + uint32_t state_offset, *dw; + unsigned num_targets, i; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 376: + * + * "The blend state is stored as an array of up to 8 elements..." + */ + num_targets = fb->state.nr_cbufs; + assert(num_targets <= 8); + + if (!num_targets) { + if (!alpha->enabled) + return 0; + /* to be able to reference alpha func */ + num_targets = 1; + } + + state_len = 2 * num_targets; + + dw = ilo_cp_steal_ptr(cp, "BLEND_STATE", + state_len, state_align, &state_offset); + + for (i = 0; i < num_targets; i++) { + const unsigned idx = (blend->independent_blend_enable) ? i : 0; + const struct ilo_blend_cso *cso = &blend->cso[idx]; + const int num_samples = fb->num_samples; + const struct util_format_description *format_desc = + (idx < fb->state.nr_cbufs) ? + util_format_description(fb->state.cbufs[idx]->format) : NULL; + bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one; + + rt_is_unorm = true; + rt_is_pure_integer = false; + rt_dst_alpha_forced_one = false; + + if (format_desc) { + int ch; + + switch (format_desc->format) { + case PIPE_FORMAT_B8G8R8X8_UNORM: + /* force alpha to one when the HW format has alpha */ + assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM) + == BRW_SURFACEFORMAT_B8G8R8A8_UNORM); + rt_dst_alpha_forced_one = true; + break; + default: + break; + } + + for (ch = 0; ch < 4; ch++) { + if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID) + continue; + + if (format_desc->channel[ch].pure_integer) { + rt_is_unorm = false; + rt_is_pure_integer = true; + break; + } + + if (!format_desc->channel[ch].normalized || + format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED) + rt_is_unorm = false; + } + } + + dw[0] = cso->payload[0]; + dw[1] = cso->payload[1]; + + if (!rt_is_pure_integer) { + if (rt_dst_alpha_forced_one) + dw[0] |= cso->dw_blend_dst_alpha_forced_one; + else + dw[0] |= cso->dw_blend; + } + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 365: + * + * "Logic Ops are only supported on *_UNORM surfaces (excluding + * _SRGB variants), otherwise Logic Ops must be DISABLED." + * + * Since logicop is ignored for non-UNORM color buffers, no special care + * is needed. + */ + if (rt_is_unorm) + dw[1] |= cso->dw_logicop; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 356: + * + * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage + * Dither both must be disabled." + * + * There is no such limitation on GEN7, or for AlphaToOne. But GL + * requires that anyway. + */ + if (num_samples > 1) + dw[1] |= cso->dw_alpha_mod; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 382: + * + * "Alpha Test can only be enabled if Pixel Shader outputs a float + * alpha value." + */ + if (alpha->enabled && !rt_is_pure_integer) { + dw[1] |= 1 << 16 | + gen6_translate_dsa_func(alpha->func) << 13; + } + + dw += 2; + } + + return state_offset; +} + +static inline uint32_t +gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev, + const struct ilo_dsa_state *dsa, + struct ilo_cp *cp) +{ + const int state_align = 64 / 4; + const int state_len = 3; + uint32_t state_offset, *dw; + + + ILO_GPE_VALID_GEN(dev, 6, 7); + + dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE", + state_len, state_align, &state_offset); + + dw[0] = dsa->payload[0]; + dw[1] = dsa->payload[1]; + dw[2] = dsa->payload[2]; + + return state_offset; +} + +static inline uint32_t +gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev, + const struct ilo_scissor_state *scissor, + unsigned num_viewports, + struct ilo_cp *cp) +{ + const int state_align = 32 / 4; + const int state_len = 2 * num_viewports; + uint32_t state_offset, *dw; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 263: + * + * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is + * stored as an array of up to 16 elements..." + */ + assert(num_viewports && num_viewports <= 16); + + dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT", + state_len, state_align, &state_offset); + + memcpy(dw, scissor->payload, state_len * 4); + + return state_offset; +} + +static inline uint32_t +gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev, + uint32_t *surface_states, + int num_surface_states, + struct ilo_cp *cp) +{ + const int state_align = 32 / 4; + const int state_len = num_surface_states; + uint32_t state_offset, *dw; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 69: + * + * "It is stored as an array of up to 256 elements..." + */ + assert(num_surface_states <= 256); + + if (!num_surface_states) + return 0; + + dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE", + state_len, state_align, &state_offset); + memcpy(dw, surface_states, + num_surface_states * sizeof(surface_states[0])); + + return state_offset; +} + +static inline uint32_t +gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev, + const struct ilo_view_surface *surf, + bool for_render, + struct ilo_cp *cp) +{ + const int state_align = 32 / 4; + const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6; + uint32_t state_offset; + uint32_t read_domains, write_domain; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + if (for_render) { + read_domains = INTEL_DOMAIN_RENDER; + write_domain = INTEL_DOMAIN_RENDER; + } + else { + read_domains = INTEL_DOMAIN_SAMPLER; + write_domain = 0; + } + + ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset); + + STATIC_ASSERT(Elements(surf->payload) >= 8); + + ilo_cp_write(cp, surf->payload[0]); + ilo_cp_write_bo(cp, surf->payload[1], + surf->bo, read_domains, write_domain); + ilo_cp_write(cp, surf->payload[2]); + ilo_cp_write(cp, surf->payload[3]); + ilo_cp_write(cp, surf->payload[4]); + ilo_cp_write(cp, surf->payload[5]); + + if (dev->gen >= ILO_GEN(7)) { + ilo_cp_write(cp, surf->payload[6]); + ilo_cp_write(cp, surf->payload[7]); + } + + ilo_cp_end(cp); + + return state_offset; +} + +static inline uint32_t +gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev, + const struct pipe_stream_output_target *so, + const struct pipe_stream_output_info *so_info, + int so_index, + struct ilo_cp *cp) +{ + struct ilo_buffer *buf = ilo_buffer(so->buffer); + unsigned bo_offset, struct_size; + enum pipe_format elem_format; + struct ilo_view_surface surf; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4; + struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4; + + switch (so_info->output[so_index].num_components) { + case 1: + elem_format = PIPE_FORMAT_R32_FLOAT; + break; + case 2: + elem_format = PIPE_FORMAT_R32G32_FLOAT; + break; + case 3: + elem_format = PIPE_FORMAT_R32G32B32_FLOAT; + break; + case 4: + elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + break; + default: + assert(!"unexpected SO components length"); + elem_format = PIPE_FORMAT_R32_FLOAT; + break; + } + + ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size, + struct_size, elem_format, false, true, &surf); + + return gen6_emit_SURFACE_STATE(dev, &surf, false, cp); +} + +static inline uint32_t +gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev, + const struct ilo_sampler_cso * const *samplers, + const struct pipe_sampler_view * const *views, + const uint32_t *sampler_border_colors, + int num_samplers, + struct ilo_cp *cp) +{ + const int state_align = 32 / 4; + const int state_len = 4 * num_samplers; + uint32_t state_offset, *dw; + int i; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 101: + * + * "The sampler state is stored as an array of up to 16 elements..." + */ + assert(num_samplers <= 16); + + if (!num_samplers) + return 0; + + dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE", + state_len, state_align, &state_offset); + + for (i = 0; i < num_samplers; i++) { + const struct ilo_sampler_cso *sampler = samplers[i]; + const struct pipe_sampler_view *view = views[i]; + const uint32_t border_color = sampler_border_colors[i]; + uint32_t dw_filter, dw_wrap; + + /* there may be holes */ + if (!sampler || !view) { + /* disabled sampler */ + dw[0] = 1 << 31; + dw[1] = 0; + dw[2] = 0; + dw[3] = 0; + dw += 4; + + continue; + } + + /* determine filter and wrap modes */ + switch (view->texture->target) { + case PIPE_TEXTURE_1D: + dw_filter = (sampler->anisotropic) ? + sampler->dw_filter_aniso : sampler->dw_filter; + dw_wrap = sampler->dw_wrap_1d; + break; + case PIPE_TEXTURE_3D: + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 103: + * + * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for + * surfaces of type SURFTYPE_3D." + */ + dw_filter = sampler->dw_filter; + dw_wrap = sampler->dw_wrap; + break; + case PIPE_TEXTURE_CUBE: + dw_filter = (sampler->anisotropic) ? + sampler->dw_filter_aniso : sampler->dw_filter; + dw_wrap = sampler->dw_wrap_cube; + break; + default: + dw_filter = (sampler->anisotropic) ? + sampler->dw_filter_aniso : sampler->dw_filter; + dw_wrap = sampler->dw_wrap; + break; + } + + dw[0] = sampler->payload[0]; + dw[1] = sampler->payload[1]; + assert(!(border_color & 0x1f)); + dw[2] = border_color; + dw[3] = sampler->payload[2]; + + dw[0] |= dw_filter; + + if (dev->gen >= ILO_GEN(7)) { + dw[3] |= dw_wrap; + } + else { + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 21: + * + * "[DevSNB] Errata: Incorrect behavior is observed in cases + * where the min and mag mode filters are different and + * SurfMinLOD is nonzero. The determination of MagMode uses the + * following equation instead of the one in the above + * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)" + * + * As a way to work around that, we set Base to + * view->u.tex.first_level. + */ + dw[0] |= view->u.tex.first_level << 22; + + dw[1] |= dw_wrap; + } + + dw += 4; + } + + return state_offset; +} + +static inline uint32_t +gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev, + const struct ilo_sampler_cso *sampler, + struct ilo_cp *cp) +{ + const int state_align = 32 / 4; + const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12; + uint32_t state_offset, *dw; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE", + state_len, state_align, &state_offset); + + /* see ilo_gpe_init_sampler_cso() */ + memcpy(dw, &sampler->payload[3], state_len * 4); + + return state_offset; +} + +static inline uint32_t +gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev, + int size, void **pcb, + struct ilo_cp *cp) +{ + /* + * For all VS, GS, FS, and CS push constant buffers, they must be aligned + * to 32 bytes, and their sizes are specified in 256-bit units. + */ + const int state_align = 32 / 4; + const int state_len = align(size, 32) / 4; + uint32_t state_offset; + char *buf; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER", + state_len, state_align, &state_offset); + + /* zero out the unused range */ + if (size < state_len * 4) + memset(&buf[size], 0, state_len * 4 - size); + + if (pcb) + *pcb = buf; + + return state_offset; +} #endif /* ILO_GPE_GEN6_H */ diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen7.c b/src/gallium/drivers/ilo/ilo_gpe_gen7.c index 2a590be2ddc..0af7eea0cb2 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen7.c +++ b/src/gallium/drivers/ilo/ilo_gpe_gen7.c @@ -35,54 +35,6 @@ #include "ilo_shader.h" #include "ilo_gpe_gen7.h" -static void -gen7_emit_GPGPU_WALKER(const struct ilo_dev_info *dev, - struct ilo_cp *cp) -{ - assert(!"GPGPU_WALKER unsupported"); -} - -static void -gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev, - uint32_t clear_val, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x04); - const uint8_t cmd_len = 3; - - ILO_GPE_VALID_GEN(dev, 7, 7); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, clear_val); - ilo_cp_write(cp, 1); - ilo_cp_end(cp); -} - -static void -gen7_emit_3dstate_pointer(const struct ilo_dev_info *dev, - int subop, uint32_t pointer, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop); - const uint8_t cmd_len = 2; - - ILO_GPE_VALID_GEN(dev, 7, 7); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, pointer); - ilo_cp_end(cp); -} - -static void -gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev, - uint32_t color_calc_state, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp); -} - void ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev, const struct ilo_shader_state *gs, @@ -125,74 +77,6 @@ ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev, cso->payload[2] = dw5; } -static void -gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev, - const struct ilo_shader_state *gs, - int num_samplers, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11); - const uint8_t cmd_len = 7; - const struct ilo_shader_cso *cso; - uint32_t dw2, dw4, dw5; - - ILO_GPE_VALID_GEN(dev, 7, 7); - - if (!gs) { - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, GEN6_GS_STATISTICS_ENABLE); - ilo_cp_write(cp, 0); - ilo_cp_end(cp); - return; - } - - cso = ilo_shader_get_kernel_cso(gs); - dw2 = cso->payload[0]; - dw4 = cso->payload[1]; - dw5 = cso->payload[2]; - - dw2 |= ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, ilo_shader_get_kernel_offset(gs)); - ilo_cp_write(cp, dw2); - ilo_cp_write(cp, 0); /* scratch */ - ilo_cp_write(cp, dw4); - ilo_cp_write(cp, dw5); - ilo_cp_write(cp, 0); - ilo_cp_end(cp); -} - -static void -gen7_emit_3DSTATE_SF(const struct ilo_dev_info *dev, - const struct ilo_rasterizer_state *rasterizer, - const struct pipe_surface *zs_surf, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13); - const uint8_t cmd_len = 7; - const int num_samples = 1; - uint32_t payload[6]; - - ILO_GPE_VALID_GEN(dev, 7, 7); - - ilo_gpe_gen6_fill_3dstate_sf_raster(dev, - rasterizer, num_samples, - (zs_surf) ? zs_surf->format : PIPE_FORMAT_NONE, - payload, Elements(payload)); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write_multi(cp, payload, 6); - ilo_cp_end(cp); -} - void ilo_gpe_init_rasterizer_wm_gen7(const struct ilo_dev_info *dev, const struct pipe_rasterizer_state *state, @@ -335,934 +219,6 @@ ilo_gpe_init_fs_cso_gen7(const struct ilo_dev_info *dev, cso->payload[3] = wm_dw1; } -static void -gen7_emit_3DSTATE_WM(const struct ilo_dev_info *dev, - const struct ilo_shader_state *fs, - const struct ilo_rasterizer_state *rasterizer, - bool cc_may_kill, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14); - const uint8_t cmd_len = 3; - const int num_samples = 1; - uint32_t dw1, dw2; - - ILO_GPE_VALID_GEN(dev, 7, 7); - - /* see ilo_gpe_init_rasterizer_wm() */ - dw1 = rasterizer->wm.payload[0]; - dw2 = rasterizer->wm.payload[1]; - - dw1 |= GEN7_WM_STATISTICS_ENABLE; - - if (false) { - dw1 |= GEN7_WM_DEPTH_CLEAR; - dw1 |= GEN7_WM_DEPTH_RESOLVE; - dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE; - } - - if (fs) { - const struct ilo_shader_cso *fs_cso = ilo_shader_get_kernel_cso(fs); - - dw1 |= fs_cso->payload[3]; - } - - if (cc_may_kill) { - dw1 |= GEN7_WM_DISPATCH_ENABLE | - GEN7_WM_KILL_ENABLE; - } - - if (num_samples > 1) { - dw1 |= rasterizer->wm.dw_msaa_rast; - dw2 |= rasterizer->wm.dw_msaa_disp; - } - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, dw1); - ilo_cp_write(cp, dw2); - ilo_cp_end(cp); -} - -static void -gen7_emit_3dstate_constant(const struct ilo_dev_info *dev, - int subop, - const uint32_t *bufs, const int *sizes, - int num_bufs, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop); - const uint8_t cmd_len = 7; - uint32_t dw[6]; - int total_read_length, i; - - ILO_GPE_VALID_GEN(dev, 7, 7); - - /* VS, HS, DS, GS, and PS variants */ - assert(subop >= 0x15 && subop <= 0x1a && subop != 0x18); - - assert(num_bufs <= 4); - - dw[0] = 0; - dw[1] = 0; - - total_read_length = 0; - for (i = 0; i < 4; i++) { - int read_len; - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 112: - * - * "Constant buffers must be enabled in order from Constant Buffer 0 - * to Constant Buffer 3 within this command. For example, it is - * not allowed to enable Constant Buffer 1 by programming a - * non-zero value in the VS Constant Buffer 1 Read Length without a - * non-zero value in VS Constant Buffer 0 Read Length." - */ - if (i >= num_bufs || !sizes[i]) { - for (; i < 4; i++) { - assert(i >= num_bufs || !sizes[i]); - dw[2 + i] = 0; - } - break; - } - - /* read lengths are in 256-bit units */ - read_len = (sizes[i] + 31) / 32; - /* the lower 5 bits are used for memory object control state */ - assert(bufs[i] % 32 == 0); - - dw[i / 2] |= read_len << ((i % 2) ? 16 : 0); - dw[2 + i] = bufs[i]; - - total_read_length += read_len; - } - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 113: - * - * "The sum of all four read length fields must be less than or equal - * to the size of 64" - */ - assert(total_read_length <= 64); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write_multi(cp, dw, 6); - ilo_cp_end(cp); -} - -static void -gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev, - const uint32_t *bufs, const int *sizes, - int num_bufs, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_constant(dev, 0x15, bufs, sizes, num_bufs, cp); -} - -static void -gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev, - const uint32_t *bufs, const int *sizes, - int num_bufs, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_constant(dev, 0x16, bufs, sizes, num_bufs, cp); -} - -static void -gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev, - const uint32_t *bufs, const int *sizes, - int num_bufs, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_constant(dev, 0x17, bufs, sizes, num_bufs, cp); -} - -static void -gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev, - unsigned sample_mask, - int num_samples, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18); - const uint8_t cmd_len = 2; - const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1; - - ILO_GPE_VALID_GEN(dev, 7, 7); - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 294: - * - * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field - * (Sample Mask) must be zero. - * - * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field - * must be zero." - */ - sample_mask &= valid_mask; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, sample_mask); - ilo_cp_end(cp); -} - -static void -gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info *dev, - const uint32_t *bufs, const int *sizes, - int num_bufs, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_constant(dev, 0x19, bufs, sizes, num_bufs, cp); -} - -static void -gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info *dev, - const uint32_t *bufs, const int *sizes, - int num_bufs, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_constant(dev, 0x1a, bufs, sizes, num_bufs, cp); -} - -static void -gen7_emit_3DSTATE_HS(const struct ilo_dev_info *dev, - const struct ilo_shader_state *hs, - int num_samplers, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1b); - const uint8_t cmd_len = 7; - - ILO_GPE_VALID_GEN(dev, 7, 7); - - assert(!hs); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_end(cp); -} - -static void -gen7_emit_3DSTATE_TE(const struct ilo_dev_info *dev, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1c); - const uint8_t cmd_len = 4; - - ILO_GPE_VALID_GEN(dev, 7, 7); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_end(cp); -} - -static void -gen7_emit_3DSTATE_DS(const struct ilo_dev_info *dev, - const struct ilo_shader_state *ds, - int num_samplers, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1d); - const uint8_t cmd_len = 6; - - ILO_GPE_VALID_GEN(dev, 7, 7); - - assert(!ds); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_end(cp); - -} - -static void -gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info *dev, - unsigned buffer_mask, - int vertex_attrib_count, - bool rasterizer_discard, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1e); - const uint8_t cmd_len = 3; - const bool enable = (buffer_mask != 0); - uint32_t dw1, dw2; - int read_len; - - ILO_GPE_VALID_GEN(dev, 7, 7); - - if (!enable) { - dw1 = 0 << SO_RENDER_STREAM_SELECT_SHIFT; - if (rasterizer_discard) - dw1 |= SO_RENDERING_DISABLE; - - dw2 = 0; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, dw1); - ilo_cp_write(cp, dw2); - ilo_cp_end(cp); - return; - } - - read_len = (vertex_attrib_count + 1) / 2; - if (!read_len) - read_len = 1; - - dw1 = SO_FUNCTION_ENABLE | - 0 << SO_RENDER_STREAM_SELECT_SHIFT | - SO_STATISTICS_ENABLE | - buffer_mask << 8; - - if (rasterizer_discard) - dw1 |= SO_RENDERING_DISABLE; - - /* API_OPENGL */ - if (true) - dw1 |= SO_REORDER_TRAILING; - - dw2 = 0 << SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT | - 0 << SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT | - 0 << SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT | - 0 << SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT | - 0 << SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT | - 0 << SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT | - 0 << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT | - (read_len - 1) << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, dw1); - ilo_cp_write(cp, dw2); - ilo_cp_end(cp); -} - -static void -gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev, - const struct ilo_rasterizer_state *rasterizer, - const struct ilo_shader_state *fs, - const struct ilo_shader_state *last_sh, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1f); - const uint8_t cmd_len = 14; - uint32_t dw[13]; - - ILO_GPE_VALID_GEN(dev, 7, 7); - - ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer, - fs, last_sh, dw, Elements(dw)); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write_multi(cp, dw, 13); - ilo_cp_end(cp); -} - -static void -gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev, - const struct ilo_shader_state *fs, - int num_samplers, bool dual_blend, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x20); - const uint8_t cmd_len = 8; - const struct ilo_shader_cso *cso; - uint32_t dw2, dw4, dw5; - - ILO_GPE_VALID_GEN(dev, 7, 7); - - if (!fs) { - /* see brwCreateContext() */ - const int max_threads = (dev->gt == 2) ? 172 : 48; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - /* GPU hangs if none of the dispatch enable bits is set */ - ilo_cp_write(cp, (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT | - GEN7_PS_8_DISPATCH_ENABLE); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_end(cp); - - return; - } - - cso = ilo_shader_get_kernel_cso(fs); - dw2 = cso->payload[0]; - dw4 = cso->payload[1]; - dw5 = cso->payload[2]; - - dw2 |= (num_samplers + 3) / 4 << GEN7_PS_SAMPLER_COUNT_SHIFT; - - if (dual_blend) - dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs)); - ilo_cp_write(cp, dw2); - ilo_cp_write(cp, 0); /* scratch */ - ilo_cp_write(cp, dw4); - ilo_cp_write(cp, dw5); - ilo_cp_write(cp, 0); /* kernel 1 */ - ilo_cp_write(cp, 0); /* kernel 2 */ - ilo_cp_end(cp); -} - -static void -gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info *dev, - uint32_t sf_clip_viewport, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x21, sf_clip_viewport, cp); -} - -static void -gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info *dev, - uint32_t cc_viewport, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x23, cc_viewport, cp); -} - -static void -gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info *dev, - uint32_t blend_state, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x24, blend_state, cp); -} - -static void -gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info *dev, - uint32_t depth_stencil_state, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x25, depth_stencil_state, cp); -} - -static void -gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info *dev, - uint32_t binding_table, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x26, binding_table, cp); -} - -static void -gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info *dev, - uint32_t binding_table, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x27, binding_table, cp); -} - -static void -gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info *dev, - uint32_t binding_table, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x28, binding_table, cp); -} - -static void -gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info *dev, - uint32_t binding_table, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x29, binding_table, cp); -} - -static void -gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info *dev, - uint32_t binding_table, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x2a, binding_table, cp); -} - -static void -gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info *dev, - uint32_t sampler_state, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x2b, sampler_state, cp); -} - -static void -gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info *dev, - uint32_t sampler_state, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x2c, sampler_state, cp); -} - -static void -gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info *dev, - uint32_t sampler_state, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x2d, sampler_state, cp); -} - -static void -gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info *dev, - uint32_t sampler_state, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x2e, sampler_state, cp); -} - -static void -gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info *dev, - uint32_t sampler_state, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_pointer(dev, 0x2f, sampler_state, cp); -} - -static void -gen7_emit_3dstate_urb(const struct ilo_dev_info *dev, - int subop, int offset, int size, - int entry_size, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop); - const uint8_t cmd_len = 2; - const int row_size = 64; /* 512 bits */ - int alloc_size, num_entries, min_entries, max_entries; - - ILO_GPE_VALID_GEN(dev, 7, 7); - - /* VS, HS, DS, and GS variants */ - assert(subop >= 0x30 && subop <= 0x33); - - /* in multiples of 8KB */ - assert(offset % 8192 == 0); - offset /= 8192; - - /* in multiple of 512-bit rows */ - alloc_size = (entry_size + row_size - 1) / row_size; - if (!alloc_size) - alloc_size = 1; - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 34: - * - * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may - * cause performance to decrease due to banking in the URB. Element - * sizes of 16 to 20 should be programmed with six 512-bit URB rows." - */ - if (subop == 0x30 && alloc_size == 5) - alloc_size = 6; - - /* in multiples of 8 */ - num_entries = (size / row_size / alloc_size) & ~7; - - switch (subop) { - case 0x30: /* 3DSTATE_URB_VS */ - min_entries = 32; - max_entries = (dev->gt == 2) ? 704 : 512; - - assert(num_entries >= min_entries); - if (num_entries > max_entries) - num_entries = max_entries; - break; - case 0x31: /* 3DSTATE_URB_HS */ - max_entries = (dev->gt == 2) ? 64 : 32; - if (num_entries > max_entries) - num_entries = max_entries; - break; - case 0x32: /* 3DSTATE_URB_DS */ - if (num_entries) - assert(num_entries >= 138); - break; - case 0x33: /* 3DSTATE_URB_GS */ - max_entries = (dev->gt == 2) ? 320 : 192; - if (num_entries > max_entries) - num_entries = max_entries; - break; - default: - break; - } - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, offset << GEN7_URB_STARTING_ADDRESS_SHIFT | - (alloc_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT | - num_entries); - ilo_cp_end(cp); -} - -static void -gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info *dev, - int offset, int size, int entry_size, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_urb(dev, 0x30, offset, size, entry_size, cp); -} - -static void -gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info *dev, - int offset, int size, int entry_size, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_urb(dev, 0x31, offset, size, entry_size, cp); -} - -static void -gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info *dev, - int offset, int size, int entry_size, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_urb(dev, 0x32, offset, size, entry_size, cp); -} - -static void -gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info *dev, - int offset, int size, int entry_size, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_urb(dev, 0x33, offset, size, entry_size, cp); -} - -static void -gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info *dev, - int subop, int offset, int size, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, subop); - const uint8_t cmd_len = 2; - int end; - - ILO_GPE_VALID_GEN(dev, 7, 7); - - /* VS, HS, DS, GS, and PS variants */ - assert(subop >= 0x12 && subop <= 0x16); - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 68: - * - * "(A table that says the maximum size of each constant buffer is - * 16KB") - * - * From the Ivy Bridge PRM, volume 2 part 1, page 115: - * - * "The sum of the Constant Buffer Offset and the Constant Buffer Size - * may not exceed the maximum value of the Constant Buffer Size." - * - * Thus, the valid range of buffer end is [0KB, 16KB]. - */ - end = (offset + size) / 1024; - if (end > 16) { - assert(!"invalid constant buffer end"); - end = 16; - } - - /* the valid range of buffer offset is [0KB, 15KB] */ - offset = (offset + 1023) / 1024; - if (offset > 15) { - assert(!"invalid constant buffer offset"); - offset = 15; - } - - if (offset > end) { - assert(!size); - offset = end; - } - - /* the valid range of buffer size is [0KB, 15KB] */ - size = end - offset; - if (size > 15) { - assert(!"invalid constant buffer size"); - size = 15; - } - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT | - size); - ilo_cp_end(cp); -} - -static void -gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info *dev, - int offset, int size, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_push_constant_alloc(dev, 0x12, offset, size, cp); -} - -static void -gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info *dev, - int offset, int size, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_push_constant_alloc(dev, 0x13, offset, size, cp); -} - -static void -gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info *dev, - int offset, int size, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_push_constant_alloc(dev, 0x14, offset, size, cp); -} - -static void -gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info *dev, - int offset, int size, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_push_constant_alloc(dev, 0x15, offset, size, cp); -} - -static void -gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info *dev, - int offset, int size, - struct ilo_cp *cp) -{ - gen7_emit_3dstate_push_constant_alloc(dev, 0x16, offset, size, cp); -} - -static void -gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev, - const struct pipe_stream_output_info *so_info, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x17); - uint16_t cmd_len; - int buffer_selects, num_entries, i; - uint16_t so_decls[128]; - - ILO_GPE_VALID_GEN(dev, 7, 7); - - buffer_selects = 0; - num_entries = 0; - - if (so_info) { - int buffer_offsets[PIPE_MAX_SO_BUFFERS]; - - memset(buffer_offsets, 0, sizeof(buffer_offsets)); - - for (i = 0; i < so_info->num_outputs; i++) { - unsigned decl, buf, reg, mask; - - buf = so_info->output[i].output_buffer; - - /* pad with holes */ - assert(buffer_offsets[buf] <= so_info->output[i].dst_offset); - while (buffer_offsets[buf] < so_info->output[i].dst_offset) { - int num_dwords; - - num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf]; - if (num_dwords > 4) - num_dwords = 4; - - decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT | - SO_DECL_HOLE_FLAG | - ((1 << num_dwords) - 1) << SO_DECL_COMPONENT_MASK_SHIFT; - - so_decls[num_entries++] = decl; - buffer_offsets[buf] += num_dwords; - } - - reg = so_info->output[i].register_index; - mask = ((1 << so_info->output[i].num_components) - 1) << - so_info->output[i].start_component; - - decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT | - reg << SO_DECL_REGISTER_INDEX_SHIFT | - mask << SO_DECL_COMPONENT_MASK_SHIFT; - - so_decls[num_entries++] = decl; - buffer_selects |= 1 << buf; - buffer_offsets[buf] += so_info->output[i].num_components; - } - } - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 201: - * - * "Errata: All 128 decls for all four streams must be included - * whenever this command is issued. The "Num Entries [n]" fields still - * contain the actual numbers of valid decls." - * - * Also note that "DWord Length" has 9 bits for this command, and the type - * of cmd_len is thus uint16_t. - */ - cmd_len = 2 * 128 + 3; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, 0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT | - 0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT | - 0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT | - buffer_selects << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT); - ilo_cp_write(cp, 0 << SO_NUM_ENTRIES_3_SHIFT | - 0 << SO_NUM_ENTRIES_2_SHIFT | - 0 << SO_NUM_ENTRIES_1_SHIFT | - num_entries << SO_NUM_ENTRIES_0_SHIFT); - - for (i = 0; i < num_entries; i++) { - ilo_cp_write(cp, so_decls[i]); - ilo_cp_write(cp, 0); - } - for (; i < 128; i++) { - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - } - - ilo_cp_end(cp); -} - -static void -gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev, - int index, int base, int stride, - const struct pipe_stream_output_target *so_target, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x18); - const uint8_t cmd_len = 4; - struct ilo_buffer *buf; - int end; - - ILO_GPE_VALID_GEN(dev, 7, 7); - - if (!so_target || !so_target->buffer) { - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_end(cp); - return; - } - - buf = ilo_buffer(so_target->buffer); - - /* DWord-aligned */ - assert(stride % 4 == 0 && base % 4 == 0); - assert(so_target->buffer_offset % 4 == 0); - - stride &= ~3; - base = (base + so_target->buffer_offset) & ~3; - end = (base + so_target->buffer_size) & ~3; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT | - stride); - ilo_cp_write_bo(cp, base, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); - ilo_cp_write_bo(cp, end, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); - ilo_cp_end(cp); -} - -static void -gen7_emit_3DPRIMITIVE(const struct ilo_dev_info *dev, - const struct pipe_draw_info *info, - const struct ilo_ib_state *ib, - bool rectlist, - struct ilo_cp *cp) -{ - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00); - const uint8_t cmd_len = 7; - const int prim = (rectlist) ? - _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode); - const int vb_access = (info->indexed) ? - GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM : - GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL; - const uint32_t vb_start = info->start + - ((info->indexed) ? ib->draw_start_offset : 0); - - ILO_GPE_VALID_GEN(dev, 7, 7); - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, vb_access | prim); - ilo_cp_write(cp, info->count); - ilo_cp_write(cp, vb_start); - ilo_cp_write(cp, info->instance_count); - ilo_cp_write(cp, info->start_instance); - ilo_cp_write(cp, info->index_bias); - ilo_cp_end(cp); -} - -static uint32_t -gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev, - const struct ilo_viewport_cso *viewports, - unsigned num_viewports, - struct ilo_cp *cp) -{ - const int state_align = 64 / 4; - const int state_len = 16 * num_viewports; - uint32_t state_offset, *dw; - unsigned i; - - ILO_GPE_VALID_GEN(dev, 7, 7); - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 270: - * - * "The viewport-specific state used by both the SF and CL units - * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each - * of which contains the DWords described below. The start of each - * element is spaced 16 DWords apart. The location of first element of - * the array, as specified by both Pointer to SF_VIEWPORT and Pointer - * to CLIP_VIEWPORT, is aligned to a 64-byte boundary." - */ - assert(num_viewports && num_viewports <= 16); - - dw = ilo_cp_steal_ptr(cp, "SF_CLIP_VIEWPORT", - state_len, state_align, &state_offset); - - for (i = 0; i < num_viewports; i++) { - const struct ilo_viewport_cso *vp = &viewports[i]; - - dw[0] = fui(vp->m00); - dw[1] = fui(vp->m11); - dw[2] = fui(vp->m22); - dw[3] = fui(vp->m30); - dw[4] = fui(vp->m31); - dw[5] = fui(vp->m32); - dw[6] = 0; - dw[7] = 0; - dw[8] = fui(vp->min_gbx); - dw[9] = fui(vp->max_gbx); - dw[10] = fui(vp->min_gby); - dw[11] = fui(vp->max_gby); - dw[12] = 0; - dw[13] = 0; - dw[14] = 0; - dw[15] = 0; - - dw += 16; - } - - return state_offset; -} - void ilo_gpe_init_view_surface_null_gen7(const struct ilo_dev_info *dev, unsigned width, unsigned height, diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen7.h b/src/gallium/drivers/ilo/ilo_gpe_gen7.h index 32120154886..e9ddf18a284 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen7.h +++ b/src/gallium/drivers/ilo/ilo_gpe_gen7.h @@ -490,4 +490,1048 @@ struct ilo_gpe_gen7 { const struct ilo_gpe_gen7 * ilo_gpe_gen7_get(void); +static inline void +gen7_emit_GPGPU_WALKER(const struct ilo_dev_info *dev, + struct ilo_cp *cp) +{ + assert(!"GPGPU_WALKER unsupported"); +} + +static inline void +gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev, + uint32_t clear_val, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x04); + const uint8_t cmd_len = 3; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, clear_val); + ilo_cp_write(cp, 1); + ilo_cp_end(cp); +} + +static inline void +gen7_emit_3dstate_pointer(const struct ilo_dev_info *dev, + int subop, uint32_t pointer, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop); + const uint8_t cmd_len = 2; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, pointer); + ilo_cp_end(cp); +} + +static inline void +gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev, + uint32_t color_calc_state, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp); +} + +static inline void +gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev, + const struct ilo_shader_state *gs, + int num_samplers, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11); + const uint8_t cmd_len = 7; + const struct ilo_shader_cso *cso; + uint32_t dw2, dw4, dw5; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + if (!gs) { + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, GEN6_GS_STATISTICS_ENABLE); + ilo_cp_write(cp, 0); + ilo_cp_end(cp); + return; + } + + cso = ilo_shader_get_kernel_cso(gs); + dw2 = cso->payload[0]; + dw4 = cso->payload[1]; + dw5 = cso->payload[2]; + + dw2 |= ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, ilo_shader_get_kernel_offset(gs)); + ilo_cp_write(cp, dw2); + ilo_cp_write(cp, 0); /* scratch */ + ilo_cp_write(cp, dw4); + ilo_cp_write(cp, dw5); + ilo_cp_write(cp, 0); + ilo_cp_end(cp); +} + +static inline void +gen7_emit_3DSTATE_SF(const struct ilo_dev_info *dev, + const struct ilo_rasterizer_state *rasterizer, + const struct pipe_surface *zs_surf, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13); + const uint8_t cmd_len = 7; + const int num_samples = 1; + uint32_t payload[6]; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + ilo_gpe_gen6_fill_3dstate_sf_raster(dev, + rasterizer, num_samples, + (zs_surf) ? zs_surf->format : PIPE_FORMAT_NONE, + payload, Elements(payload)); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write_multi(cp, payload, 6); + ilo_cp_end(cp); +} + +static inline void +gen7_emit_3DSTATE_WM(const struct ilo_dev_info *dev, + const struct ilo_shader_state *fs, + const struct ilo_rasterizer_state *rasterizer, + bool cc_may_kill, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14); + const uint8_t cmd_len = 3; + const int num_samples = 1; + uint32_t dw1, dw2; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + /* see ilo_gpe_init_rasterizer_wm() */ + dw1 = rasterizer->wm.payload[0]; + dw2 = rasterizer->wm.payload[1]; + + dw1 |= GEN7_WM_STATISTICS_ENABLE; + + if (false) { + dw1 |= GEN7_WM_DEPTH_CLEAR; + dw1 |= GEN7_WM_DEPTH_RESOLVE; + dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE; + } + + if (fs) { + const struct ilo_shader_cso *fs_cso = ilo_shader_get_kernel_cso(fs); + + dw1 |= fs_cso->payload[3]; + } + + if (cc_may_kill) { + dw1 |= GEN7_WM_DISPATCH_ENABLE | + GEN7_WM_KILL_ENABLE; + } + + if (num_samples > 1) { + dw1 |= rasterizer->wm.dw_msaa_rast; + dw2 |= rasterizer->wm.dw_msaa_disp; + } + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, dw1); + ilo_cp_write(cp, dw2); + ilo_cp_end(cp); +} + +static inline void +gen7_emit_3dstate_constant(const struct ilo_dev_info *dev, + int subop, + const uint32_t *bufs, const int *sizes, + int num_bufs, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop); + const uint8_t cmd_len = 7; + uint32_t dw[6]; + int total_read_length, i; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + /* VS, HS, DS, GS, and PS variants */ + assert(subop >= 0x15 && subop <= 0x1a && subop != 0x18); + + assert(num_bufs <= 4); + + dw[0] = 0; + dw[1] = 0; + + total_read_length = 0; + for (i = 0; i < 4; i++) { + int read_len; + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 112: + * + * "Constant buffers must be enabled in order from Constant Buffer 0 + * to Constant Buffer 3 within this command. For example, it is + * not allowed to enable Constant Buffer 1 by programming a + * non-zero value in the VS Constant Buffer 1 Read Length without a + * non-zero value in VS Constant Buffer 0 Read Length." + */ + if (i >= num_bufs || !sizes[i]) { + for (; i < 4; i++) { + assert(i >= num_bufs || !sizes[i]); + dw[2 + i] = 0; + } + break; + } + + /* read lengths are in 256-bit units */ + read_len = (sizes[i] + 31) / 32; + /* the lower 5 bits are used for memory object control state */ + assert(bufs[i] % 32 == 0); + + dw[i / 2] |= read_len << ((i % 2) ? 16 : 0); + dw[2 + i] = bufs[i]; + + total_read_length += read_len; + } + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 113: + * + * "The sum of all four read length fields must be less than or equal + * to the size of 64" + */ + assert(total_read_length <= 64); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write_multi(cp, dw, 6); + ilo_cp_end(cp); +} + +static inline void +gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_constant(dev, 0x15, bufs, sizes, num_bufs, cp); +} + +static inline void +gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_constant(dev, 0x16, bufs, sizes, num_bufs, cp); +} + +static inline void +gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_constant(dev, 0x17, bufs, sizes, num_bufs, cp); +} + +static inline void +gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev, + unsigned sample_mask, + int num_samples, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18); + const uint8_t cmd_len = 2; + const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 294: + * + * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field + * (Sample Mask) must be zero. + * + * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field + * must be zero." + */ + sample_mask &= valid_mask; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, sample_mask); + ilo_cp_end(cp); +} + +static inline void +gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_constant(dev, 0x19, bufs, sizes, num_bufs, cp); +} + +static inline void +gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_constant(dev, 0x1a, bufs, sizes, num_bufs, cp); +} + +static inline void +gen7_emit_3DSTATE_HS(const struct ilo_dev_info *dev, + const struct ilo_shader_state *hs, + int num_samplers, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1b); + const uint8_t cmd_len = 7; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + assert(!hs); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_end(cp); +} + +static inline void +gen7_emit_3DSTATE_TE(const struct ilo_dev_info *dev, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1c); + const uint8_t cmd_len = 4; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_end(cp); +} + +static inline void +gen7_emit_3DSTATE_DS(const struct ilo_dev_info *dev, + const struct ilo_shader_state *ds, + int num_samplers, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1d); + const uint8_t cmd_len = 6; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + assert(!ds); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_end(cp); + +} + +static inline void +gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info *dev, + unsigned buffer_mask, + int vertex_attrib_count, + bool rasterizer_discard, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1e); + const uint8_t cmd_len = 3; + const bool enable = (buffer_mask != 0); + uint32_t dw1, dw2; + int read_len; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + if (!enable) { + dw1 = 0 << SO_RENDER_STREAM_SELECT_SHIFT; + if (rasterizer_discard) + dw1 |= SO_RENDERING_DISABLE; + + dw2 = 0; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, dw1); + ilo_cp_write(cp, dw2); + ilo_cp_end(cp); + return; + } + + read_len = (vertex_attrib_count + 1) / 2; + if (!read_len) + read_len = 1; + + dw1 = SO_FUNCTION_ENABLE | + 0 << SO_RENDER_STREAM_SELECT_SHIFT | + SO_STATISTICS_ENABLE | + buffer_mask << 8; + + if (rasterizer_discard) + dw1 |= SO_RENDERING_DISABLE; + + /* API_OPENGL */ + if (true) + dw1 |= SO_REORDER_TRAILING; + + dw2 = 0 << SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT | + 0 << SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT | + 0 << SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT | + 0 << SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT | + 0 << SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT | + 0 << SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT | + 0 << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT | + (read_len - 1) << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, dw1); + ilo_cp_write(cp, dw2); + ilo_cp_end(cp); +} + +static inline void +gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev, + const struct ilo_rasterizer_state *rasterizer, + const struct ilo_shader_state *fs, + const struct ilo_shader_state *last_sh, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1f); + const uint8_t cmd_len = 14; + uint32_t dw[13]; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer, + fs, last_sh, dw, Elements(dw)); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write_multi(cp, dw, 13); + ilo_cp_end(cp); +} + +static inline void +gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev, + const struct ilo_shader_state *fs, + int num_samplers, bool dual_blend, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x20); + const uint8_t cmd_len = 8; + const struct ilo_shader_cso *cso; + uint32_t dw2, dw4, dw5; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + if (!fs) { + /* see brwCreateContext() */ + const int max_threads = (dev->gt == 2) ? 172 : 48; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + /* GPU hangs if none of the dispatch enable bits is set */ + ilo_cp_write(cp, (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT | + GEN7_PS_8_DISPATCH_ENABLE); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_end(cp); + + return; + } + + cso = ilo_shader_get_kernel_cso(fs); + dw2 = cso->payload[0]; + dw4 = cso->payload[1]; + dw5 = cso->payload[2]; + + dw2 |= (num_samplers + 3) / 4 << GEN7_PS_SAMPLER_COUNT_SHIFT; + + if (dual_blend) + dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs)); + ilo_cp_write(cp, dw2); + ilo_cp_write(cp, 0); /* scratch */ + ilo_cp_write(cp, dw4); + ilo_cp_write(cp, dw5); + ilo_cp_write(cp, 0); /* kernel 1 */ + ilo_cp_write(cp, 0); /* kernel 2 */ + ilo_cp_end(cp); +} + +static inline void +gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info *dev, + uint32_t sf_clip_viewport, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x21, sf_clip_viewport, cp); +} + +static inline void +gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info *dev, + uint32_t cc_viewport, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x23, cc_viewport, cp); +} + +static inline void +gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info *dev, + uint32_t blend_state, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x24, blend_state, cp); +} + +static inline void +gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info *dev, + uint32_t depth_stencil_state, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x25, depth_stencil_state, cp); +} + +static inline void +gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info *dev, + uint32_t binding_table, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x26, binding_table, cp); +} + +static inline void +gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info *dev, + uint32_t binding_table, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x27, binding_table, cp); +} + +static inline void +gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info *dev, + uint32_t binding_table, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x28, binding_table, cp); +} + +static inline void +gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info *dev, + uint32_t binding_table, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x29, binding_table, cp); +} + +static inline void +gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info *dev, + uint32_t binding_table, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x2a, binding_table, cp); +} + +static inline void +gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info *dev, + uint32_t sampler_state, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x2b, sampler_state, cp); +} + +static inline void +gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info *dev, + uint32_t sampler_state, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x2c, sampler_state, cp); +} + +static inline void +gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info *dev, + uint32_t sampler_state, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x2d, sampler_state, cp); +} + +static inline void +gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info *dev, + uint32_t sampler_state, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x2e, sampler_state, cp); +} + +static inline void +gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info *dev, + uint32_t sampler_state, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_pointer(dev, 0x2f, sampler_state, cp); +} + +static inline void +gen7_emit_3dstate_urb(const struct ilo_dev_info *dev, + int subop, int offset, int size, + int entry_size, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop); + const uint8_t cmd_len = 2; + const int row_size = 64; /* 512 bits */ + int alloc_size, num_entries, min_entries, max_entries; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + /* VS, HS, DS, and GS variants */ + assert(subop >= 0x30 && subop <= 0x33); + + /* in multiples of 8KB */ + assert(offset % 8192 == 0); + offset /= 8192; + + /* in multiple of 512-bit rows */ + alloc_size = (entry_size + row_size - 1) / row_size; + if (!alloc_size) + alloc_size = 1; + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 34: + * + * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may + * cause performance to decrease due to banking in the URB. Element + * sizes of 16 to 20 should be programmed with six 512-bit URB rows." + */ + if (subop == 0x30 && alloc_size == 5) + alloc_size = 6; + + /* in multiples of 8 */ + num_entries = (size / row_size / alloc_size) & ~7; + + switch (subop) { + case 0x30: /* 3DSTATE_URB_VS */ + min_entries = 32; + max_entries = (dev->gt == 2) ? 704 : 512; + + assert(num_entries >= min_entries); + if (num_entries > max_entries) + num_entries = max_entries; + break; + case 0x31: /* 3DSTATE_URB_HS */ + max_entries = (dev->gt == 2) ? 64 : 32; + if (num_entries > max_entries) + num_entries = max_entries; + break; + case 0x32: /* 3DSTATE_URB_DS */ + if (num_entries) + assert(num_entries >= 138); + break; + case 0x33: /* 3DSTATE_URB_GS */ + max_entries = (dev->gt == 2) ? 320 : 192; + if (num_entries > max_entries) + num_entries = max_entries; + break; + default: + break; + } + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, offset << GEN7_URB_STARTING_ADDRESS_SHIFT | + (alloc_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT | + num_entries); + ilo_cp_end(cp); +} + +static inline void +gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info *dev, + int offset, int size, int entry_size, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_urb(dev, 0x30, offset, size, entry_size, cp); +} + +static inline void +gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info *dev, + int offset, int size, int entry_size, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_urb(dev, 0x31, offset, size, entry_size, cp); +} + +static inline void +gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info *dev, + int offset, int size, int entry_size, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_urb(dev, 0x32, offset, size, entry_size, cp); +} + +static inline void +gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info *dev, + int offset, int size, int entry_size, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_urb(dev, 0x33, offset, size, entry_size, cp); +} + +static inline void +gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info *dev, + int subop, int offset, int size, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, subop); + const uint8_t cmd_len = 2; + int end; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + /* VS, HS, DS, GS, and PS variants */ + assert(subop >= 0x12 && subop <= 0x16); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 68: + * + * "(A table that says the maximum size of each constant buffer is + * 16KB") + * + * From the Ivy Bridge PRM, volume 2 part 1, page 115: + * + * "The sum of the Constant Buffer Offset and the Constant Buffer Size + * may not exceed the maximum value of the Constant Buffer Size." + * + * Thus, the valid range of buffer end is [0KB, 16KB]. + */ + end = (offset + size) / 1024; + if (end > 16) { + assert(!"invalid constant buffer end"); + end = 16; + } + + /* the valid range of buffer offset is [0KB, 15KB] */ + offset = (offset + 1023) / 1024; + if (offset > 15) { + assert(!"invalid constant buffer offset"); + offset = 15; + } + + if (offset > end) { + assert(!size); + offset = end; + } + + /* the valid range of buffer size is [0KB, 15KB] */ + size = end - offset; + if (size > 15) { + assert(!"invalid constant buffer size"); + size = 15; + } + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT | + size); + ilo_cp_end(cp); +} + +static inline void +gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info *dev, + int offset, int size, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_push_constant_alloc(dev, 0x12, offset, size, cp); +} + +static inline void +gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info *dev, + int offset, int size, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_push_constant_alloc(dev, 0x13, offset, size, cp); +} + +static inline void +gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info *dev, + int offset, int size, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_push_constant_alloc(dev, 0x14, offset, size, cp); +} + +static inline void +gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info *dev, + int offset, int size, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_push_constant_alloc(dev, 0x15, offset, size, cp); +} + +static inline void +gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info *dev, + int offset, int size, + struct ilo_cp *cp) +{ + gen7_emit_3dstate_push_constant_alloc(dev, 0x16, offset, size, cp); +} + +static inline void +gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev, + const struct pipe_stream_output_info *so_info, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x17); + uint16_t cmd_len; + int buffer_selects, num_entries, i; + uint16_t so_decls[128]; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + buffer_selects = 0; + num_entries = 0; + + if (so_info) { + int buffer_offsets[PIPE_MAX_SO_BUFFERS]; + + memset(buffer_offsets, 0, sizeof(buffer_offsets)); + + for (i = 0; i < so_info->num_outputs; i++) { + unsigned decl, buf, reg, mask; + + buf = so_info->output[i].output_buffer; + + /* pad with holes */ + assert(buffer_offsets[buf] <= so_info->output[i].dst_offset); + while (buffer_offsets[buf] < so_info->output[i].dst_offset) { + int num_dwords; + + num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf]; + if (num_dwords > 4) + num_dwords = 4; + + decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT | + SO_DECL_HOLE_FLAG | + ((1 << num_dwords) - 1) << SO_DECL_COMPONENT_MASK_SHIFT; + + so_decls[num_entries++] = decl; + buffer_offsets[buf] += num_dwords; + } + + reg = so_info->output[i].register_index; + mask = ((1 << so_info->output[i].num_components) - 1) << + so_info->output[i].start_component; + + decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT | + reg << SO_DECL_REGISTER_INDEX_SHIFT | + mask << SO_DECL_COMPONENT_MASK_SHIFT; + + so_decls[num_entries++] = decl; + buffer_selects |= 1 << buf; + buffer_offsets[buf] += so_info->output[i].num_components; + } + } + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 201: + * + * "Errata: All 128 decls for all four streams must be included + * whenever this command is issued. The "Num Entries [n]" fields still + * contain the actual numbers of valid decls." + * + * Also note that "DWord Length" has 9 bits for this command, and the type + * of cmd_len is thus uint16_t. + */ + cmd_len = 2 * 128 + 3; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, 0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT | + 0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT | + 0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT | + buffer_selects << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT); + ilo_cp_write(cp, 0 << SO_NUM_ENTRIES_3_SHIFT | + 0 << SO_NUM_ENTRIES_2_SHIFT | + 0 << SO_NUM_ENTRIES_1_SHIFT | + num_entries << SO_NUM_ENTRIES_0_SHIFT); + + for (i = 0; i < num_entries; i++) { + ilo_cp_write(cp, so_decls[i]); + ilo_cp_write(cp, 0); + } + for (; i < 128; i++) { + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + } + + ilo_cp_end(cp); +} + +static inline void +gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev, + int index, int base, int stride, + const struct pipe_stream_output_target *so_target, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x18); + const uint8_t cmd_len = 4; + struct ilo_buffer *buf; + int end; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + if (!so_target || !so_target->buffer) { + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_end(cp); + return; + } + + buf = ilo_buffer(so_target->buffer); + + /* DWord-aligned */ + assert(stride % 4 == 0 && base % 4 == 0); + assert(so_target->buffer_offset % 4 == 0); + + stride &= ~3; + base = (base + so_target->buffer_offset) & ~3; + end = (base + so_target->buffer_size) & ~3; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT | + stride); + ilo_cp_write_bo(cp, base, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); + ilo_cp_write_bo(cp, end, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); + ilo_cp_end(cp); +} + +static inline void +gen7_emit_3DPRIMITIVE(const struct ilo_dev_info *dev, + const struct pipe_draw_info *info, + const struct ilo_ib_state *ib, + bool rectlist, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00); + const uint8_t cmd_len = 7; + const int prim = (rectlist) ? + _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode); + const int vb_access = (info->indexed) ? + GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM : + GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL; + const uint32_t vb_start = info->start + + ((info->indexed) ? ib->draw_start_offset : 0); + + ILO_GPE_VALID_GEN(dev, 7, 7); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2)); + ilo_cp_write(cp, vb_access | prim); + ilo_cp_write(cp, info->count); + ilo_cp_write(cp, vb_start); + ilo_cp_write(cp, info->instance_count); + ilo_cp_write(cp, info->start_instance); + ilo_cp_write(cp, info->index_bias); + ilo_cp_end(cp); +} + +static inline uint32_t +gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports, + struct ilo_cp *cp) +{ + const int state_align = 64 / 4; + const int state_len = 16 * num_viewports; + uint32_t state_offset, *dw; + unsigned i; + + ILO_GPE_VALID_GEN(dev, 7, 7); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 270: + * + * "The viewport-specific state used by both the SF and CL units + * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each + * of which contains the DWords described below. The start of each + * element is spaced 16 DWords apart. The location of first element of + * the array, as specified by both Pointer to SF_VIEWPORT and Pointer + * to CLIP_VIEWPORT, is aligned to a 64-byte boundary." + */ + assert(num_viewports && num_viewports <= 16); + + dw = ilo_cp_steal_ptr(cp, "SF_CLIP_VIEWPORT", + state_len, state_align, &state_offset); + + for (i = 0; i < num_viewports; i++) { + const struct ilo_viewport_cso *vp = &viewports[i]; + + dw[0] = fui(vp->m00); + dw[1] = fui(vp->m11); + dw[2] = fui(vp->m22); + dw[3] = fui(vp->m30); + dw[4] = fui(vp->m31); + dw[5] = fui(vp->m32); + dw[6] = 0; + dw[7] = 0; + dw[8] = fui(vp->min_gbx); + dw[9] = fui(vp->max_gbx); + dw[10] = fui(vp->min_gby); + dw[11] = fui(vp->max_gby); + dw[12] = 0; + dw[13] = 0; + dw[14] = 0; + dw[15] = 0; + + dw += 16; + } + + return state_offset; +} + #endif /* ILO_GPE_GEN7_H */ -- 2.30.2