From: Chia-I Wu Date: Fri, 12 Jun 2015 06:56:56 +0000 (+0800) Subject: ilo: add ilo_state_ps X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=6dad848d1acfe781c735120c3db97f1a2f0c28fa;p=mesa.git ilo: add ilo_state_ps We want to make ilo_shader_cso a union of ilo_state_{vs,hs,ds,gs,ps}. --- diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index 2a630779848..3b38277af19 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -32,6 +32,7 @@ C_SOURCES := \ core/ilo_state_sbe.c \ core/ilo_state_sbe.h \ core/ilo_state_shader.c \ + core/ilo_state_shader_ps.c \ core/ilo_state_shader.h \ core/ilo_state_sol.c \ core/ilo_state_sol.h \ diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader.h b/src/gallium/drivers/ilo/core/ilo_state_shader.h index c1dbf23dfb3..44690c5b0bb 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_shader.h +++ b/src/gallium/drivers/ilo/core/ilo_state_shader.h @@ -123,6 +123,54 @@ struct ilo_state_gs_info { bool stats_enable; }; +struct ilo_state_ps_io_info { + /* inputs */ + enum gen_position_offset posoffset; + uint8_t attr_count; + bool use_z; + bool use_w; + bool use_coverage_mask; + + /* outputs */ + enum gen_pscdepth_mode pscdepth; + bool has_rt_write; + bool write_pixel_mask; + bool write_omask; +}; + +struct ilo_state_ps_params_info { + /* compatibility with raster states */ + uint32_t sample_mask; + bool earlyz_control_psexec; + + /* compatibility with cc states */ + bool alpha_may_kill; + bool dual_source_blending; + bool has_writeable_rt; +}; + +struct ilo_state_ps_info { + struct ilo_state_shader_kernel_info kernel_8; + struct ilo_state_shader_kernel_info kernel_16; + struct ilo_state_shader_kernel_info kernel_32; + struct ilo_state_shader_resource_info resource; + + struct ilo_state_ps_io_info io; + struct ilo_state_ps_params_info params; + + /* bitmask of GEN6_PS_DISPATCH_x */ + uint8_t valid_kernels; + bool per_sample_dispatch; + bool sample_count_one; + bool cv_per_sample_interp; + bool cv_has_earlyz_op; + + bool rt_clear_enable; + bool rt_resolve_enable; + + bool cv_has_depth_buffer; +}; + struct ilo_state_vs { uint32_t vs[5]; }; @@ -140,6 +188,20 @@ struct ilo_state_gs { uint32_t gs[5]; }; +struct ilo_state_ps { + uint32_t ps[8]; + + struct ilo_state_ps_dispatch_conds { + bool ps_valid; + + bool has_rt_write; + bool write_odepth; + bool write_ostencil; + bool has_uav_write; + bool ps_may_kill; + } conds; +}; + bool ilo_state_vs_init(struct ilo_state_vs *vs, const struct ilo_dev *dev, @@ -177,4 +239,18 @@ bool ilo_state_gs_init_disabled(struct ilo_state_gs *gs, const struct ilo_dev *dev); +bool +ilo_state_ps_init(struct ilo_state_ps *ps, + const struct ilo_dev *dev, + const struct ilo_state_ps_info *info); + +bool +ilo_state_ps_init_disabled(struct ilo_state_ps *ps, + const struct ilo_dev *dev); + +bool +ilo_state_ps_set_params(struct ilo_state_ps *ps, + const struct ilo_dev *dev, + const struct ilo_state_ps_params_info *params); + #endif /* ILO_STATE_SHADER_H */ diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c b/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c new file mode 100644 index 00000000000..f4d801e9b56 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c @@ -0,0 +1,771 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#include "ilo_debug.h" +#include "ilo_state_shader.h" + +struct pixel_ff { + uint8_t dispatch_modes; + + uint32_t kernel_offsets[3]; + uint8_t grf_starts[3]; + bool pcb_enable; + uint8_t scratch_space; + + uint8_t sampler_count; + uint8_t surface_count; + bool has_uav; + + uint16_t thread_count; + + struct ilo_state_ps_dispatch_conds conds; + + bool kill_pixel; + bool dispatch_enable; + bool dual_source_blending; + uint32_t sample_mask; +}; + +static bool +ps_kernel_validate_gen6(const struct ilo_dev *dev, + const struct ilo_state_shader_kernel_info *kernel) +{ + /* "Dispatch GRF Start Register for Constant/Setup Data" is U7 */ + const uint8_t max_grf_start = 128; + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 271: + * + * "(Per-Thread Scratch Space) + * Range [0,11] indicating [1k bytes, 2M bytes] in powers of two" + */ + const uint32_t max_scratch_size = 2 * 1024 * 1024; + + ILO_DEV_ASSERT(dev, 6, 8); + + /* "Kernel Start Pointer" is 64-byte aligned */ + assert(kernel->offset % 64 == 0); + + assert(kernel->grf_start < max_grf_start); + assert(kernel->scratch_size <= max_scratch_size); + + return true; +} + +static bool +ps_validate_gen6(const struct ilo_dev *dev, + const struct ilo_state_ps_info *info) +{ + const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8; + const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16; + const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32; + const struct ilo_state_ps_io_info *io = &info->io; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (!ps_kernel_validate_gen6(dev, kernel_8) || + !ps_kernel_validate_gen6(dev, kernel_16) || + !ps_kernel_validate_gen6(dev, kernel_32)) + return false; + + /* unsupported on Gen6 */ + if (ilo_dev_gen(dev) == ILO_GEN(6)) + assert(!io->use_coverage_mask); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 275: + * + * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth + * field must be set to disabled." + */ + if (ilo_dev_gen(dev) == ILO_GEN(6) && io->pscdepth != GEN7_PSCDEPTH_OFF) + assert(info->cv_has_depth_buffer); + + if (!info->per_sample_dispatch) { + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 281: + * + * "MSDISPMODE_PERSAMPLE is required in order to select + * POSOFFSET_SAMPLE." + */ + assert(io->posoffset != GEN6_POSOFFSET_SAMPLE); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 282: + * + * "MSDISPMODE_PERSAMPLE is required in order to select + * INTERP_SAMPLE." + * + * From the Sandy Bridge PRM, volume 2 part 1, page 283: + * + * "MSDISPMODE_PERSAMPLE is required in order to select Perspective + * Sample or Non-perspective Sample barycentric coordinates." + */ + assert(!info->cv_per_sample_interp); + } + + /* + * + * From the Sandy Bridge PRM, volume 2 part 1, page 314: + * + * "Pixel Shader Dispatch, Alpha... must all be disabled." + * + * Simply disallow any valid kernel when there is early-z op. Also, when + * there is no valid kernel, io should be zeroed. + */ + if (info->valid_kernels) + assert(!info->cv_has_earlyz_op); + else + assert(ilo_is_zeroed(io, sizeof(*io))); + + return true; +} + +static uint8_t +ps_get_gen6_dispatch_modes(const struct ilo_dev *dev, + const struct ilo_state_ps_info *info) +{ + const struct ilo_state_ps_io_info *io = &info->io; + uint8_t dispatch_modes = info->valid_kernels; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (!dispatch_modes) + return 0; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 334: + * + * "Not valid on [DevSNB] if 4x PERPIXEL mode with pixel shader + * computed depth." + * + * "Valid on all products, except when in non-1x PERSAMPLE mode + * (applies to [DevSNB+] only)" + * + * From the Sandy Bridge PRM, volume 4 part 1, page 239: + * + * "[DevSNB]: When Pixel Shader outputs oDepth and PS invocation mode + * is PERPIXEL, Message Type for Render Target Write must be SIMD8. + * + * Errata: [DevSNB+]: When Pixel Shader outputs oMask, this message + * type is not supported: SIMD8 (including SIMD8_DUALSRC_xx)." + * + * It is really hard to follow what combinations are valid on what + * platforms. Judging from the restrictions on RT write messages on Gen6, + * oDepth and oMask related issues should be Gen6-specific. PERSAMPLE + * issue should be universal, and disallows multiple dispatch modes. + */ + if (ilo_dev_gen(dev) == ILO_GEN(6)) { + if (io->pscdepth != GEN7_PSCDEPTH_OFF && !info->per_sample_dispatch) + dispatch_modes &= GEN6_PS_DISPATCH_8; + if (io->write_omask) + dispatch_modes &= ~GEN6_PS_DISPATCH_8; + } + if (info->per_sample_dispatch && !info->sample_count_one) { + /* prefer 32 over 16 over 8 */ + if (dispatch_modes & GEN6_PS_DISPATCH_32) + dispatch_modes &= GEN6_PS_DISPATCH_32; + else if (dispatch_modes & GEN6_PS_DISPATCH_16) + dispatch_modes &= GEN6_PS_DISPATCH_16; + else + dispatch_modes &= GEN6_PS_DISPATCH_8; + } + + /* + * From the Broadwell PRM, volume 2b, page 149: + * + * "When Render Target Fast Clear Enable is ENABLED or Render Target + * Resolve Type = RESOLVE_PARTIAL or RESOLVE_FULL, this bit (8 Pixel + * Dispatch or Dual-8 Pixel Dispatch Enable) must be DISABLED." + */ + if (info->rt_clear_enable || info->rt_resolve_enable) + dispatch_modes &= ~GEN6_PS_DISPATCH_8; + + assert(dispatch_modes); + + return dispatch_modes; +} + +static uint16_t +ps_get_gen6_thread_count(const struct ilo_dev *dev, + const struct ilo_state_ps_info *info) +{ + uint16_t thread_count; + + ILO_DEV_ASSERT(dev, 6, 8); + + /* Maximum Number of Threads of 3DSTATE_PS */ + switch (ilo_dev_gen(dev)) { + case ILO_GEN(8): + /* scaled automatically */ + thread_count = 64 - 1; + break; + case ILO_GEN(7.5): + thread_count = (dev->gt == 3) ? 408 : + (dev->gt == 2) ? 204 : 102; + break; + case ILO_GEN(7): + thread_count = (dev->gt == 2) ? 172 : 48; + break; + case ILO_GEN(6): + default: + /* from the classic driver instead of the PRM */ + thread_count = (dev->gt == 2) ? 80 : 40; + break; + } + + return thread_count - 1; +} + +static bool +ps_params_get_gen6_kill_pixel(const struct ilo_dev *dev, + const struct ilo_state_ps_params_info *params, + const struct ilo_state_ps_dispatch_conds *conds) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 275: + * + * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the + * PS kernel or color calculator has the ability to kill (discard) + * pixels or samples, other than due to depth or stencil testing. + * This bit is required to be ENABLED in the following situations: + * + * The API pixel shader program contains "killpix" or "discard" + * instructions, or other code in the pixel shader kernel that can + * cause the final pixel mask to differ from the pixel mask received + * on dispatch. + * + * A sampler with chroma key enabled with kill pixel mode is used by + * the pixel shader. + * + * Any render target has Alpha Test Enable or AlphaToCoverage Enable + * enabled. + * + * The pixel shader kernel generates and outputs oMask. + * + * Note: As ClipDistance clipping is fully supported in hardware and + * therefore not via PS instructions, there should be no need to + * ENABLE this bit due to ClipDistance clipping." + */ + return (conds->ps_may_kill || params->alpha_may_kill); +} + +static bool +ps_params_get_gen6_dispatch_enable(const struct ilo_dev *dev, + const struct ilo_state_ps_params_info *params, + const struct ilo_state_ps_dispatch_conds *conds) +{ + /* + * We want to skip dispatching when EarlyZ suffices. The conditions that + * require dispatching are + * + * - PS writes RTs and RTs are writeable + * - PS changes depth value and depth test/write is enabled + * - PS changes stencil value and stencil test is enabled + * - PS writes UAVs + * - PS or CC kills pixels + * - EDSC is PSEXEC, and depth test/write or stencil test is enabled + */ + bool dispatch_required = + ((conds->has_rt_write && params->has_writeable_rt) || + conds->write_odepth || + conds->write_ostencil || + conds->has_uav_write || + ps_params_get_gen6_kill_pixel(dev, params, conds) || + params->earlyz_control_psexec); + + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 280: + * + * "If EDSC_PSEXEC mode is selected, Thread Dispatch Enable must be + * set." + */ + if (ilo_dev_gen(dev) < ILO_GEN(8) && params->earlyz_control_psexec) + dispatch_required = true; + + /* assert it is valid to dispatch */ + if (dispatch_required) + assert(conds->ps_valid); + + return dispatch_required; +} + +static bool +ps_get_gen6_ff_kernels(const struct ilo_dev *dev, + const struct ilo_state_ps_info *info, + struct pixel_ff *ff) +{ + const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8; + const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16; + const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32; + uint32_t scratch_size; + + ILO_DEV_ASSERT(dev, 6, 8); + + ff->dispatch_modes = ps_get_gen6_dispatch_modes(dev, info); + + /* initialize kernel offsets and GRF starts */ + if (util_is_power_of_two(ff->dispatch_modes)) { + if (ff->dispatch_modes & GEN6_PS_DISPATCH_8) { + ff->kernel_offsets[0] = kernel_8->offset; + ff->grf_starts[0] = kernel_8->grf_start; + } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_16) { + ff->kernel_offsets[0] = kernel_16->offset; + ff->grf_starts[0] = kernel_16->grf_start; + } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_32) { + ff->kernel_offsets[0] = kernel_32->offset; + ff->grf_starts[0] = kernel_32->grf_start; + } + } else { + ff->kernel_offsets[0] = kernel_8->offset; + ff->kernel_offsets[1] = kernel_32->offset; + ff->kernel_offsets[2] = kernel_16->offset; + + ff->grf_starts[0] = kernel_8->grf_start; + ff->grf_starts[1] = kernel_32->grf_start; + ff->grf_starts[2] = kernel_16->grf_start; + } + + /* we do not want to save it */ + assert(ff->kernel_offsets[0] == 0); + + ff->pcb_enable = (((ff->dispatch_modes & GEN6_PS_DISPATCH_8) && + kernel_8->pcb_attr_count) || + ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) && + kernel_16->pcb_attr_count) || + ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) && + kernel_32->pcb_attr_count)); + + scratch_size = 0; + if ((ff->dispatch_modes & GEN6_PS_DISPATCH_8) && + scratch_size < kernel_8->scratch_size) + scratch_size = kernel_8->scratch_size; + if ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) && + scratch_size < kernel_16->scratch_size) + scratch_size = kernel_16->scratch_size; + if ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) && + scratch_size < kernel_32->scratch_size) + scratch_size = kernel_32->scratch_size; + + /* next power of two, starting from 1KB */ + ff->scratch_space = (scratch_size > 1024) ? + (util_last_bit(scratch_size - 1) - 10): 0; + + /* GPU hangs on Haswell if none of the dispatch mode bits is set */ + if (ilo_dev_gen(dev) == ILO_GEN(7.5) && !ff->dispatch_modes) + ff->dispatch_modes |= GEN6_PS_DISPATCH_8; + + return true; +} + +static bool +ps_get_gen6_ff(const struct ilo_dev *dev, + const struct ilo_state_ps_info *info, + struct pixel_ff *ff) +{ + const struct ilo_state_shader_resource_info *resource = &info->resource; + const struct ilo_state_ps_io_info *io = &info->io; + const struct ilo_state_ps_params_info *params = &info->params; + + ILO_DEV_ASSERT(dev, 6, 8); + + memset(ff, 0, sizeof(*ff)); + + if (!ps_validate_gen6(dev, info) || !ps_get_gen6_ff_kernels(dev, info, ff)) + return false; + + ff->sampler_count = (resource->sampler_count <= 12) ? + (resource->sampler_count + 3) / 4 : 4; + ff->surface_count = resource->surface_count; + ff->has_uav = resource->has_uav; + + ff->thread_count = ps_get_gen6_thread_count(dev, info); + + ff->conds.ps_valid = (info->valid_kernels != 0x0); + ff->conds.has_rt_write = io->has_rt_write; + ff->conds.write_odepth = (io->pscdepth != GEN7_PSCDEPTH_OFF); + ff->conds.write_ostencil = false; + ff->conds.has_uav_write = resource->has_uav; + ff->conds.ps_may_kill = (io->write_pixel_mask || io->write_omask); + + ff->kill_pixel = ps_params_get_gen6_kill_pixel(dev, params, &ff->conds); + ff->dispatch_enable = + ps_params_get_gen6_dispatch_enable(dev, params, &ff->conds); + ff->dual_source_blending = params->dual_source_blending; + ff->sample_mask = params->sample_mask; + + return true; +} + +static bool +ps_set_gen6_3dstate_wm(struct ilo_state_ps *ps, + const struct ilo_dev *dev, + const struct ilo_state_ps_info *info, + const struct pixel_ff *ff) +{ + const struct ilo_state_ps_io_info *io = &info->io; + uint32_t dw2, dw3, dw4, dw5, dw6; + + ILO_DEV_ASSERT(dev, 6, 6); + + dw2 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT | + ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT; + + if (false) + dw2 |= GEN6_THREADDISP_FP_MODE_ALT; + + dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + + dw4 = ff->grf_starts[0] << GEN6_WM_DW4_URB_GRF_START0__SHIFT | + ff->grf_starts[1] << GEN6_WM_DW4_URB_GRF_START1__SHIFT | + ff->grf_starts[2] << GEN6_WM_DW4_URB_GRF_START2__SHIFT; + + dw5 = ff->thread_count << GEN6_WM_DW5_MAX_THREADS__SHIFT | + ff->dispatch_modes << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT; + + if (ff->kill_pixel) + dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL; + + if (io->pscdepth != GEN7_PSCDEPTH_OFF) + dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH; + if (io->use_z) + dw5 |= GEN6_WM_DW5_PS_USE_DEPTH; + + if (ff->dispatch_enable) + dw5 |= GEN6_WM_DW5_PS_DISPATCH_ENABLE; + + if (io->write_omask) + dw5 |= GEN6_WM_DW5_PS_COMPUTE_OMASK; + if (io->use_w) + dw5 |= GEN6_WM_DW5_PS_USE_W; + + if (ff->dual_source_blending) + dw5 |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND; + + dw6 = io->attr_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT | + io->posoffset << GEN6_WM_DW6_PS_POSOFFSET__SHIFT; + + dw6 |= (info->per_sample_dispatch) ? + GEN6_WM_DW6_MSDISPMODE_PERSAMPLE : GEN6_WM_DW6_MSDISPMODE_PERPIXEL; + + STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 7); + ps->ps[0] = dw2; + ps->ps[1] = dw3; + ps->ps[2] = dw4; + ps->ps[3] = dw5; + ps->ps[4] = dw6; + ps->ps[5] = ff->kernel_offsets[1]; + ps->ps[6] = ff->kernel_offsets[2]; + + return true; +} + +static bool +ps_set_gen7_3dstate_wm(struct ilo_state_ps *ps, + const struct ilo_dev *dev, + const struct ilo_state_ps_info *info, + const struct pixel_ff *ff) +{ + const struct ilo_state_ps_io_info *io = &info->io; + uint32_t dw1, dw2; + + ILO_DEV_ASSERT(dev, 7, 7.5); + + dw1 = io->pscdepth << GEN7_WM_DW1_PSCDEPTH__SHIFT; + + if (ff->dispatch_enable) + dw1 |= GEN7_WM_DW1_PS_DISPATCH_ENABLE; + if (ff->kill_pixel) + dw1 |= GEN7_WM_DW1_PS_KILL_PIXEL; + + if (io->use_z) + dw1 |= GEN7_WM_DW1_PS_USE_DEPTH; + if (io->use_w) + dw1 |= GEN7_WM_DW1_PS_USE_W; + if (io->use_coverage_mask) + dw1 |= GEN7_WM_DW1_PS_USE_COVERAGE_MASK; + + dw2 = (info->per_sample_dispatch) ? + GEN7_WM_DW2_MSDISPMODE_PERSAMPLE : GEN7_WM_DW2_MSDISPMODE_PERPIXEL; + + STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 2); + ps->ps[0] = dw1; + ps->ps[1] = dw2; + + return true; +} + +static bool +ps_set_gen7_3DSTATE_PS(struct ilo_state_ps *ps, + const struct ilo_dev *dev, + const struct ilo_state_ps_info *info, + const struct pixel_ff *ff) +{ + const struct ilo_state_ps_io_info *io = &info->io; + uint32_t dw2, dw3, dw4, dw5; + + ILO_DEV_ASSERT(dev, 7, 7.5); + + dw2 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT | + ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT; + + if (false) + dw2 |= GEN6_THREADDISP_FP_MODE_ALT; + + dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + + dw4 = io->posoffset << GEN7_PS_DW4_POSOFFSET__SHIFT | + ff->dispatch_modes << GEN7_PS_DW4_DISPATCH_MODE__SHIFT; + + if (ilo_dev_gen(dev) == ILO_GEN(7.5)) { + dw4 |= ff->thread_count << GEN75_PS_DW4_MAX_THREADS__SHIFT | + (ff->sample_mask & 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT; + } else { + dw4 |= ff->thread_count << GEN7_PS_DW4_MAX_THREADS__SHIFT; + } + + if (ff->pcb_enable) + dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE; + if (io->attr_count) + dw4 |= GEN7_PS_DW4_ATTR_ENABLE; + if (io->write_omask) + dw4 |= GEN7_PS_DW4_COMPUTE_OMASK; + if (info->rt_clear_enable) + dw4 |= GEN7_PS_DW4_RT_FAST_CLEAR; + if (ff->dual_source_blending) + dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND; + if (info->rt_resolve_enable) + dw4 |= GEN7_PS_DW4_RT_RESOLVE; + if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff->has_uav) + dw4 |= GEN75_PS_DW4_ACCESS_UAV; + + dw5 = ff->grf_starts[0] << GEN7_PS_DW5_URB_GRF_START0__SHIFT | + ff->grf_starts[1] << GEN7_PS_DW5_URB_GRF_START1__SHIFT | + ff->grf_starts[2] << GEN7_PS_DW5_URB_GRF_START2__SHIFT; + + STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 8); + ps->ps[2] = dw2; + ps->ps[3] = dw3; + ps->ps[4] = dw4; + ps->ps[5] = dw5; + ps->ps[6] = ff->kernel_offsets[1]; + ps->ps[7] = ff->kernel_offsets[2]; + + return true; +} + +static bool +ps_set_gen8_3DSTATE_PS(struct ilo_state_ps *ps, + const struct ilo_dev *dev, + const struct ilo_state_ps_info *info, + const struct pixel_ff *ff) +{ + const struct ilo_state_ps_io_info *io = &info->io; + uint32_t dw3, dw4, dw6, dw7; + + ILO_DEV_ASSERT(dev, 8, 8); + + dw3 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT | + ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT; + + if (false) + dw3 |= GEN6_THREADDISP_FP_MODE_ALT; + + dw4 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + + dw6 = ff->thread_count << GEN8_PS_DW6_MAX_THREADS__SHIFT | + io->posoffset << GEN8_PS_DW6_POSOFFSET__SHIFT | + ff->dispatch_modes << GEN8_PS_DW6_DISPATCH_MODE__SHIFT; + + if (ff->pcb_enable) + dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE; + + if (info->rt_clear_enable) + dw6 |= GEN8_PS_DW6_RT_FAST_CLEAR; + if (info->rt_resolve_enable) + dw6 |= GEN8_PS_DW6_RT_RESOLVE; + + dw7 = ff->grf_starts[0] << GEN8_PS_DW7_URB_GRF_START0__SHIFT | + ff->grf_starts[1] << GEN8_PS_DW7_URB_GRF_START1__SHIFT | + ff->grf_starts[2] << GEN8_PS_DW7_URB_GRF_START2__SHIFT; + + STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 6); + ps->ps[0] = dw3; + ps->ps[1] = dw4; + ps->ps[2] = dw6; + ps->ps[3] = dw7; + ps->ps[4] = ff->kernel_offsets[1]; + ps->ps[5] = ff->kernel_offsets[2]; + + return true; +} + +static bool +ps_set_gen8_3DSTATE_PS_EXTRA(struct ilo_state_ps *ps, + const struct ilo_dev *dev, + const struct ilo_state_ps_info *info, + const struct pixel_ff *ff) +{ + const struct ilo_state_ps_io_info *io = &info->io; + uint32_t dw1; + + ILO_DEV_ASSERT(dev, 8, 8); + + dw1 = io->pscdepth << GEN8_PSX_DW1_PSCDEPTH__SHIFT; + + if (info->valid_kernels) + dw1 |= GEN8_PSX_DW1_VALID; + if (!io->has_rt_write) + dw1 |= GEN8_PSX_DW1_UAV_ONLY; + if (io->write_omask) + dw1 |= GEN8_PSX_DW1_COMPUTE_OMASK; + if (io->write_pixel_mask) + dw1 |= GEN8_PSX_DW1_KILL_PIXEL; + + if (io->use_z) + dw1 |= GEN8_PSX_DW1_USE_DEPTH; + if (io->use_w) + dw1 |= GEN8_PSX_DW1_USE_W; + if (io->attr_count) + dw1 |= GEN8_PSX_DW1_ATTR_ENABLE; + + if (info->per_sample_dispatch) + dw1 |= GEN8_PSX_DW1_PER_SAMPLE; + if (ff->has_uav) + dw1 |= GEN8_PSX_DW1_ACCESS_UAV; + if (io->use_coverage_mask) + dw1 |= GEN8_PSX_DW1_USE_COVERAGE_MASK; + + /* + * From the Broadwell PRM, volume 2b, page 151: + * + * "When this bit (Pixel Shader Valid) clear the rest of this command + * should also be clear. + */ + if (!info->valid_kernels) + dw1 = 0; + + STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 5); + ps->ps[4] = dw1; + + return true; +} + +bool +ilo_state_ps_init(struct ilo_state_ps *ps, + const struct ilo_dev *dev, + const struct ilo_state_ps_info *info) +{ + struct pixel_ff ff; + bool ret = true; + + assert(ilo_is_zeroed(ps, sizeof(*ps))); + + ret &= ps_get_gen6_ff(dev, info, &ff); + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + ret &= ps_set_gen8_3DSTATE_PS(ps, dev, info, &ff); + ret &= ps_set_gen8_3DSTATE_PS_EXTRA(ps, dev, info, &ff); + } else if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + ret &= ps_set_gen7_3dstate_wm(ps, dev, info, &ff); + ret &= ps_set_gen7_3DSTATE_PS(ps, dev, info, &ff); + } else { + ret &= ps_set_gen6_3dstate_wm(ps, dev, info, &ff); + } + + /* save conditions */ + ps->conds = ff.conds; + + assert(ret); + + return ret; +} + +bool +ilo_state_ps_init_disabled(struct ilo_state_ps *ps, + const struct ilo_dev *dev) +{ + struct ilo_state_ps_info info; + + memset(&info, 0, sizeof(info)); + + return ilo_state_ps_init(ps, dev, &info); +} + +bool +ilo_state_ps_set_params(struct ilo_state_ps *ps, + const struct ilo_dev *dev, + const struct ilo_state_ps_params_info *params) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + /* modify sample mask */ + if (ilo_dev_gen(dev) == ILO_GEN(7.5)) { + ps->ps[4] = (ps->ps[4] & ~GEN75_PS_DW4_SAMPLE_MASK__MASK) | + (params->sample_mask & 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT; + } + + /* modify dispatch enable, pixel kill, and dual source blending */ + if (ilo_dev_gen(dev) < ILO_GEN(8)) { + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + if (ps_params_get_gen6_dispatch_enable(dev, params, &ps->conds)) + ps->ps[0] |= GEN7_WM_DW1_PS_DISPATCH_ENABLE; + else + ps->ps[0] &= ~GEN7_WM_DW1_PS_DISPATCH_ENABLE; + + if (ps_params_get_gen6_kill_pixel(dev, params, &ps->conds)) + ps->ps[0] |= GEN7_WM_DW1_PS_KILL_PIXEL; + else + ps->ps[0] &= ~GEN7_WM_DW1_PS_KILL_PIXEL; + + if (params->dual_source_blending) + ps->ps[4] |= GEN7_PS_DW4_DUAL_SOURCE_BLEND; + else + ps->ps[4] &= ~GEN7_PS_DW4_DUAL_SOURCE_BLEND; + } else { + if (ps_params_get_gen6_dispatch_enable(dev, params, &ps->conds)) + ps->ps[3] |= GEN6_WM_DW5_PS_DISPATCH_ENABLE; + else + ps->ps[3] &= ~GEN6_WM_DW5_PS_DISPATCH_ENABLE; + + if (ps_params_get_gen6_kill_pixel(dev, params, &ps->conds)) + ps->ps[3] |= GEN6_WM_DW5_PS_KILL_PIXEL; + else + ps->ps[3] &= ~GEN6_WM_DW5_PS_KILL_PIXEL; + + if (params->dual_source_blending) + ps->ps[3] |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND; + else + ps->ps[3] &= ~GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND; + } + } + + return true; +}