src/gallium/drivers/radeonsi/si_state_shaders.c

   1 /*
   2  * Copyright 2012 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * on the rights to use, copy, modify, merge, publish, distribute, sub
   8  * license, and/or sell copies of the Software, and to permit persons to whom
   9  * the Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *      Christian König <christian.koenig@amd.com>
  25  *      Marek Olšák <maraeo@gmail.com>
  26  */
  27
  28 #include "si_pipe.h"
  29 #include "si_shader.h"
  30 #include "sid.h"
  31
  32 #include "tgsi/tgsi_parse.h"
  33 #include "util/u_memory.h"
  34 #include "util/u_simple_shaders.h"
  35
  36 static void si_set_tesseval_regs(struct si_shader *shader,
  37                                  struct si_pm4_state *pm4)
  38 {
  39         struct tgsi_shader_info *info = &shader->selector->info;
  40         unsigned tes_prim_mode = info->properties[TGSI_PROPERTY_TES_PRIM_MODE];
  41         unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING];
  42         bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW];
  43         bool tes_point_mode = info->properties[TGSI_PROPERTY_TES_POINT_MODE];
  44         unsigned type, partitioning, topology;
  45
  46         switch (tes_prim_mode) {
  47         case PIPE_PRIM_LINES:
  48                 type = V_028B6C_TESS_ISOLINE;
  49                 break;
  50         case PIPE_PRIM_TRIANGLES:
  51                 type = V_028B6C_TESS_TRIANGLE;
  52                 break;
  53         case PIPE_PRIM_QUADS:
  54                 type = V_028B6C_TESS_QUAD;
  55                 break;
  56         default:
  57                 assert(0);
  58                 return;
  59         }
  60
  61         switch (tes_spacing) {
  62         case PIPE_TESS_SPACING_FRACTIONAL_ODD:
  63                 partitioning = V_028B6C_PART_FRAC_ODD;
  64                 break;
  65         case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
  66                 partitioning = V_028B6C_PART_FRAC_EVEN;
  67                 break;
  68         case PIPE_TESS_SPACING_EQUAL:
  69                 partitioning = V_028B6C_PART_INTEGER;
  70                 break;
  71         default:
  72                 assert(0);
  73                 return;
  74         }
  75
  76         if (tes_point_mode)
  77                 topology = V_028B6C_OUTPUT_POINT;
  78         else if (tes_prim_mode == PIPE_PRIM_LINES)
  79                 topology = V_028B6C_OUTPUT_LINE;
  80         else if (tes_vertex_order_cw)
  81                 /* for some reason, this must be the other way around */
  82                 topology = V_028B6C_OUTPUT_TRIANGLE_CCW;
  83         else
  84                 topology = V_028B6C_OUTPUT_TRIANGLE_CW;
  85
  86         si_pm4_set_reg(pm4, R_028B6C_VGT_TF_PARAM,
  87                        S_028B6C_TYPE(type) |
  88                        S_028B6C_PARTITIONING(partitioning) |
  89                        S_028B6C_TOPOLOGY(topology));
  90 }
  91
  92 static void si_shader_ls(struct si_shader *shader)
  93 {
  94         struct si_pm4_state *pm4;
  95         unsigned num_sgprs, num_user_sgprs;
  96         unsigned vgpr_comp_cnt;
  97         uint64_t va;
  98
  99         pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
 100         if (pm4 == NULL)
 101                 return;
 102
 103         va = shader->bo->gpu_address;
 104         si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
 105
 106         /* We need at least 2 components for LS.
 107          * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
 108         vgpr_comp_cnt = shader->uses_instanceid ? 3 : 1;
 109
 110         num_user_sgprs = SI_LS_NUM_USER_SGPR;
 111         num_sgprs = shader->num_sgprs;
 112         if (num_user_sgprs > num_sgprs) {
 113                 /* Last 2 reserved SGPRs are used for VCC */
 114                 num_sgprs = num_user_sgprs + 2;
 115         }
 116         assert(num_sgprs <= 104);
 117
 118         si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
 119         si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
 120
 121         shader->ls_rsrc1 = S_00B528_VGPRS((shader->num_vgprs - 1) / 4) |
 122                            S_00B528_SGPRS((num_sgprs - 1) / 8) |
 123                            S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt);
 124         shader->ls_rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs);
 125 }
 126
 127 static void si_shader_hs(struct si_shader *shader)
 128 {
 129         struct si_pm4_state *pm4;
 130         unsigned num_sgprs, num_user_sgprs;
 131         uint64_t va;
 132
 133         pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
 134         if (pm4 == NULL)
 135                 return;
 136
 137         va = shader->bo->gpu_address;
 138         si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
 139
 140         num_user_sgprs = SI_TCS_NUM_USER_SGPR;
 141         num_sgprs = shader->num_sgprs;
 142         /* One SGPR after user SGPRs is pre-loaded with tessellation factor
 143          * buffer offset. */
 144         if ((num_user_sgprs + 1) > num_sgprs) {
 145                 /* Last 2 reserved SGPRs are used for VCC */
 146                 num_sgprs = num_user_sgprs + 1 + 2;
 147         }
 148         assert(num_sgprs <= 104);
 149
 150         si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
 151         si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
 152         si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
 153                        S_00B428_VGPRS((shader->num_vgprs - 1) / 4) |
 154                        S_00B428_SGPRS((num_sgprs - 1) / 8));
 155         si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
 156                        S_00B42C_USER_SGPR(num_user_sgprs));
 157 }
 158
 159 static void si_shader_es(struct si_shader *shader)
 160 {
 161         struct si_pm4_state *pm4;
 162         unsigned num_sgprs, num_user_sgprs;
 163         unsigned vgpr_comp_cnt;
 164         uint64_t va;
 165
 166         pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
 167
 168         if (pm4 == NULL)
 169                 return;
 170
 171         va = shader->bo->gpu_address;
 172         si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
 173
 174         if (shader->selector->type == PIPE_SHADER_VERTEX) {
 175                 vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
 176                 num_user_sgprs = SI_VS_NUM_USER_SGPR;
 177         } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
 178                 vgpr_comp_cnt = 3; /* all components are needed for TES */
 179                 num_user_sgprs = SI_TES_NUM_USER_SGPR;
 180         } else
 181                 assert(0);
 182
 183         num_sgprs = shader->num_sgprs;
 184         /* One SGPR after user SGPRs is pre-loaded with es2gs_offset */
 185         if ((num_user_sgprs + 1) > num_sgprs) {
 186                 /* Last 2 reserved SGPRs are used for VCC */
 187                 num_sgprs = num_user_sgprs + 1 + 2;
 188         }
 189         assert(num_sgprs <= 104);
 190
 191         si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
 192         si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, va >> 40);
 193         si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
 194                        S_00B328_VGPRS((shader->num_vgprs - 1) / 4) |
 195                        S_00B328_SGPRS((num_sgprs - 1) / 8) |
 196                        S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
 197                        S_00B328_DX10_CLAMP(shader->dx10_clamp_mode));
 198         si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES,
 199                        S_00B32C_USER_SGPR(num_user_sgprs) |
 200                        S_00B32C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
 201
 202         if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
 203                 si_set_tesseval_regs(shader, pm4);
 204 }
 205
 206 static void si_shader_gs(struct si_shader *shader)
 207 {
 208         unsigned gs_vert_itemsize = shader->selector->info.num_outputs * (16 >> 2);
 209         unsigned gs_max_vert_out = shader->selector->gs_max_out_vertices;
 210         unsigned gsvs_itemsize = gs_vert_itemsize * gs_max_vert_out;
 211         unsigned gs_num_invocations = shader->selector->gs_num_invocations;
 212         unsigned cut_mode;
 213         struct si_pm4_state *pm4;
 214         unsigned num_sgprs, num_user_sgprs;
 215         uint64_t va;
 216
 217         /* The GSVS_RING_ITEMSIZE register takes 15 bits */
 218         assert(gsvs_itemsize < (1 << 15));
 219
 220         pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
 221
 222         if (pm4 == NULL)
 223                 return;
 224
 225         if (gs_max_vert_out <= 128) {
 226                 cut_mode = V_028A40_GS_CUT_128;
 227         } else if (gs_max_vert_out <= 256) {
 228                 cut_mode = V_028A40_GS_CUT_256;
 229         } else if (gs_max_vert_out <= 512) {
 230                 cut_mode = V_028A40_GS_CUT_512;
 231         } else {
 232                 assert(gs_max_vert_out <= 1024);
 233                 cut_mode = V_028A40_GS_CUT_1024;
 234         }
 235
 236         si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE,
 237                        S_028A40_MODE(V_028A40_GS_SCENARIO_G) |
 238                        S_028A40_CUT_MODE(cut_mode)|
 239                        S_028A40_ES_WRITE_OPTIMIZE(1) |
 240                        S_028A40_GS_WRITE_OPTIMIZE(1));
 241
 242         si_pm4_set_reg(pm4, R_028A60_VGT_GSVS_RING_OFFSET_1, gsvs_itemsize);
 243         si_pm4_set_reg(pm4, R_028A64_VGT_GSVS_RING_OFFSET_2, gsvs_itemsize);
 244         si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize);
 245
 246         si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
 247                        util_bitcount64(shader->selector->inputs_read) * (16 >> 2));
 248         si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize);
 249
 250         si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, gs_max_vert_out);
 251
 252         si_pm4_set_reg(pm4, R_028B5C_VGT_GS_VERT_ITEMSIZE, gs_vert_itemsize);
 253
 254         si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT,
 255                        S_028B90_CNT(MIN2(gs_num_invocations, 127)) |
 256                        S_028B90_ENABLE(gs_num_invocations > 0));
 257
 258         va = shader->bo->gpu_address;
 259         si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
 260         si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
 261         si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40);
 262
 263         num_user_sgprs = SI_GS_NUM_USER_SGPR;
 264         num_sgprs = shader->num_sgprs;
 265         /* Two SGPRs after user SGPRs are pre-loaded with gs2vs_offset, gs_wave_id */
 266         if ((num_user_sgprs + 2) > num_sgprs) {
 267                 /* Last 2 reserved SGPRs are used for VCC */
 268                 num_sgprs = num_user_sgprs + 2 + 2;
 269         }
 270         assert(num_sgprs <= 104);
 271
 272         si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
 273                        S_00B228_VGPRS((shader->num_vgprs - 1) / 4) |
 274                        S_00B228_SGPRS((num_sgprs - 1) / 8) |
 275                        S_00B228_DX10_CLAMP(shader->dx10_clamp_mode));
 276         si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
 277                        S_00B22C_USER_SGPR(num_user_sgprs) |
 278                        S_00B22C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
 279 }
 280
 281 static void si_shader_vs(struct si_shader *shader)
 282 {
 283         struct si_pm4_state *pm4;
 284         unsigned num_sgprs, num_user_sgprs;
 285         unsigned nparams, vgpr_comp_cnt;
 286         uint64_t va;
 287         unsigned window_space =
 288            shader->selector->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
 289
 290         pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
 291
 292         if (pm4 == NULL)
 293                 return;
 294
 295         va = shader->bo->gpu_address;
 296         si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
 297
 298         if (shader->is_gs_copy_shader) {
 299                 vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */
 300                 num_user_sgprs = SI_GSCOPY_NUM_USER_SGPR;
 301         } else if (shader->selector->type == PIPE_SHADER_VERTEX) {
 302                 vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
 303                 num_user_sgprs = SI_VS_NUM_USER_SGPR;
 304         } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
 305                 vgpr_comp_cnt = 3; /* all components are needed for TES */
 306                 num_user_sgprs = SI_TES_NUM_USER_SGPR;
 307         } else
 308                 assert(0);
 309
 310         num_sgprs = shader->num_sgprs;
 311         if (num_user_sgprs > num_sgprs) {
 312                 /* Last 2 reserved SGPRs are used for VCC */
 313                 num_sgprs = num_user_sgprs + 2;
 314         }
 315         assert(num_sgprs <= 104);
 316
 317         /* VS is required to export at least one param. */
 318         nparams = MAX2(shader->nr_param_exports, 1);
 319         si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG,
 320                        S_0286C4_VS_EXPORT_COUNT(nparams - 1));
 321
 322         si_pm4_set_reg(pm4, R_02870C_SPI_SHADER_POS_FORMAT,
 323                        S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
 324                        S_02870C_POS1_EXPORT_FORMAT(shader->nr_pos_exports > 1 ?
 325                                                    V_02870C_SPI_SHADER_4COMP :
 326                                                    V_02870C_SPI_SHADER_NONE) |
 327                        S_02870C_POS2_EXPORT_FORMAT(shader->nr_pos_exports > 2 ?
 328                                                    V_02870C_SPI_SHADER_4COMP :
 329                                                    V_02870C_SPI_SHADER_NONE) |
 330                        S_02870C_POS3_EXPORT_FORMAT(shader->nr_pos_exports > 3 ?
 331                                                    V_02870C_SPI_SHADER_4COMP :
 332                                                    V_02870C_SPI_SHADER_NONE));
 333
 334         si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8);
 335         si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, va >> 40);
 336         si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS,
 337                        S_00B128_VGPRS((shader->num_vgprs - 1) / 4) |
 338                        S_00B128_SGPRS((num_sgprs - 1) / 8) |
 339                        S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
 340                        S_00B128_DX10_CLAMP(shader->dx10_clamp_mode));
 341         si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS,
 342                        S_00B12C_USER_SGPR(num_user_sgprs) |
 343                        S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
 344                        S_00B12C_SO_BASE1_EN(!!shader->selector->so.stride[1]) |
 345                        S_00B12C_SO_BASE2_EN(!!shader->selector->so.stride[2]) |
 346                        S_00B12C_SO_BASE3_EN(!!shader->selector->so.stride[3]) |
 347                        S_00B12C_SO_EN(!!shader->selector->so.num_outputs) |
 348                        S_00B12C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
 349         if (window_space)
 350                 si_pm4_set_reg(pm4, R_028818_PA_CL_VTE_CNTL,
 351                                S_028818_VTX_XY_FMT(1) | S_028818_VTX_Z_FMT(1));
 352         else
 353                 si_pm4_set_reg(pm4, R_028818_PA_CL_VTE_CNTL,
 354                                S_028818_VTX_W0_FMT(1) |
 355                                S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) |
 356                                S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) |
 357                                S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1));
 358
 359         if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
 360                 si_set_tesseval_regs(shader, pm4);
 361 }
 362
 363 static void si_shader_ps(struct si_shader *shader)
 364 {
 365         struct tgsi_shader_info *info = &shader->selector->info;
 366         struct si_pm4_state *pm4;
 367         unsigned i, spi_ps_in_control;
 368         unsigned num_sgprs, num_user_sgprs;
 369         unsigned spi_baryc_cntl = 0, spi_ps_input_ena;
 370         uint64_t va;
 371
 372         pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
 373
 374         if (pm4 == NULL)
 375                 return;
 376
 377         for (i = 0; i < info->num_inputs; i++) {
 378                 switch (info->input_semantic_name[i]) {
 379                 case TGSI_SEMANTIC_POSITION:
 380                         /* SPI_BARYC_CNTL.POS_FLOAT_LOCATION
 381                          * Possible vaules:
 382                          * 0 -> Position = pixel center (default)
 383                          * 1 -> Position = pixel centroid
 384                          * 2 -> Position = at sample position
 385                          */
 386                         switch (info->input_interpolate_loc[i]) {
 387                         case TGSI_INTERPOLATE_LOC_CENTROID:
 388                                 spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(1);
 389                                 break;
 390                         case TGSI_INTERPOLATE_LOC_SAMPLE:
 391                                 spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2);
 392                                 break;
 393                         }
 394
 395                         if (info->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] ==
 396                             TGSI_FS_COORD_PIXEL_CENTER_INTEGER)
 397                                 spi_baryc_cntl |= S_0286E0_POS_FLOAT_ULC(1);
 398                         break;
 399                 }
 400         }
 401
 402         spi_ps_in_control = S_0286D8_NUM_INTERP(shader->nparam) |
 403                 S_0286D8_BC_OPTIMIZE_DISABLE(1);
 404
 405         si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
 406         spi_ps_input_ena = shader->spi_ps_input_ena;
 407         /* we need to enable at least one of them, otherwise we hang the GPU */
 408         assert(G_0286CC_PERSP_SAMPLE_ENA(spi_ps_input_ena) ||
 409             G_0286CC_PERSP_CENTER_ENA(spi_ps_input_ena) ||
 410             G_0286CC_PERSP_CENTROID_ENA(spi_ps_input_ena) ||
 411             G_0286CC_PERSP_PULL_MODEL_ENA(spi_ps_input_ena) ||
 412             G_0286CC_LINEAR_SAMPLE_ENA(spi_ps_input_ena) ||
 413             G_0286CC_LINEAR_CENTER_ENA(spi_ps_input_ena) ||
 414             G_0286CC_LINEAR_CENTROID_ENA(spi_ps_input_ena) ||
 415             G_0286CC_LINE_STIPPLE_TEX_ENA(spi_ps_input_ena));
 416
 417         si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, spi_ps_input_ena);
 418         si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR, spi_ps_input_ena);
 419         si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control);
 420
 421         si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT, shader->spi_shader_z_format);
 422         si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT,
 423                        shader->spi_shader_col_format);
 424         si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader->cb_shader_mask);
 425
 426         va = shader->bo->gpu_address;
 427         si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
 428         si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
 429         si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40);
 430
 431         num_user_sgprs = SI_PS_NUM_USER_SGPR;
 432         num_sgprs = shader->num_sgprs;
 433         /* One SGPR after user SGPRs is pre-loaded with {prim_mask, lds_offset} */
 434         if ((num_user_sgprs + 1) > num_sgprs) {
 435                 /* Last 2 reserved SGPRs are used for VCC */
 436                 num_sgprs = num_user_sgprs + 1 + 2;
 437         }
 438         assert(num_sgprs <= 104);
 439
 440         si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
 441                        S_00B028_VGPRS((shader->num_vgprs - 1) / 4) |
 442                        S_00B028_SGPRS((num_sgprs - 1) / 8) |
 443                        S_00B028_DX10_CLAMP(shader->dx10_clamp_mode));
 444         si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
 445                        S_00B02C_EXTRA_LDS_SIZE(shader->lds_size) |
 446                        S_00B02C_USER_SGPR(num_user_sgprs) |
 447                        S_00B32C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
 448 }
 449
 450 static void si_shader_init_pm4_state(struct si_shader *shader)
 451 {
 452
 453         if (shader->pm4)
 454                 si_pm4_free_state_simple(shader->pm4);
 455
 456         switch (shader->selector->type) {
 457         case PIPE_SHADER_VERTEX:
 458                 if (shader->key.vs.as_ls)
 459                         si_shader_ls(shader);
 460                 else if (shader->key.vs.as_es)
 461                         si_shader_es(shader);
 462                 else
 463                         si_shader_vs(shader);
 464                 break;
 465         case PIPE_SHADER_TESS_CTRL:
 466                 si_shader_hs(shader);
 467                 break;
 468         case PIPE_SHADER_TESS_EVAL:
 469                 if (shader->key.tes.as_es)
 470                         si_shader_es(shader);
 471                 else
 472                         si_shader_vs(shader);
 473                 break;
 474         case PIPE_SHADER_GEOMETRY:
 475                 si_shader_gs(shader);
 476                 si_shader_vs(shader->gs_copy_shader);
 477                 break;
 478         case PIPE_SHADER_FRAGMENT:
 479                 si_shader_ps(shader);
 480                 break;
 481         default:
 482                 assert(0);
 483         }
 484 }
 485
 486 /* Compute the key for the hw shader variant */
 487 static inline void si_shader_selector_key(struct pipe_context *ctx,
 488                                           struct si_shader_selector *sel,
 489                                           union si_shader_key *key)
 490 {
 491         struct si_context *sctx = (struct si_context *)ctx;
 492         unsigned i;
 493
 494         memset(key, 0, sizeof(*key));
 495
 496         switch (sel->type) {
 497         case PIPE_SHADER_VERTEX:
 498                 if (sctx->vertex_elements)
 499                         for (i = 0; i < sctx->vertex_elements->count; ++i)
 500                                 key->vs.instance_divisors[i] =
 501                                         sctx->vertex_elements->elements[i].instance_divisor;
 502
 503                 if (sctx->tes_shader)
 504                         key->vs.as_ls = 1;
 505                 else if (sctx->gs_shader) {
 506                         key->vs.as_es = 1;
 507                         key->vs.es_enabled_outputs = sctx->gs_shader->inputs_read;
 508                 }
 509                 break;
 510         case PIPE_SHADER_TESS_CTRL:
 511                 key->tcs.prim_mode =
 512                         sctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
 513                 break;
 514         case PIPE_SHADER_TESS_EVAL:
 515                 if (sctx->gs_shader) {
 516                         key->tes.as_es = 1;
 517                         key->tes.es_enabled_outputs = sctx->gs_shader->inputs_read;
 518                 }
 519                 break;
 520         case PIPE_SHADER_GEOMETRY:
 521                 break;
 522         case PIPE_SHADER_FRAGMENT: {
 523                 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 524
 525                 if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
 526                         key->ps.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
 527                 key->ps.export_16bpc = sctx->framebuffer.export_16bpc;
 528
 529                 if (rs) {
 530                         bool is_poly = (sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES &&
 531                                         sctx->current_rast_prim <= PIPE_PRIM_POLYGON) ||
 532                                        sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES_ADJACENCY;
 533                         bool is_line = !is_poly && sctx->current_rast_prim != PIPE_PRIM_POINTS;
 534
 535                         key->ps.color_two_side = rs->two_side;
 536
 537                         if (sctx->queued.named.blend) {
 538                                 key->ps.alpha_to_one = sctx->queued.named.blend->alpha_to_one &&
 539                                                        rs->multisample_enable &&
 540                                                        !sctx->framebuffer.cb0_is_integer;
 541                         }
 542
 543                         key->ps.poly_stipple = rs->poly_stipple_enable && is_poly;
 544                         key->ps.poly_line_smoothing = ((is_poly && rs->poly_smooth) ||
 545                                                        (is_line && rs->line_smooth)) &&
 546                                                       sctx->framebuffer.nr_samples <= 1;
 547                 }
 548
 549                 key->ps.alpha_func = PIPE_FUNC_ALWAYS;
 550                 /* Alpha-test should be disabled if colorbuffer 0 is integer. */
 551                 if (sctx->queued.named.dsa &&
 552                     !sctx->framebuffer.cb0_is_integer)
 553                         key->ps.alpha_func = sctx->queued.named.dsa->alpha_func;
 554                 break;
 555         }
 556         default:
 557                 assert(0);
 558         }
 559 }
 560
 561 /* Select the hw shader variant depending on the current state. */
 562 static int si_shader_select(struct pipe_context *ctx,
 563                             struct si_shader_selector *sel)
 564 {
 565         struct si_context *sctx = (struct si_context *)ctx;
 566         union si_shader_key key;
 567         struct si_shader * shader = NULL;
 568         int r;
 569
 570         si_shader_selector_key(ctx, sel, &key);
 571
 572         /* Check if we don't need to change anything.
 573          * This path is also used for most shaders that don't need multiple
 574          * variants, it will cost just a computation of the key and this
 575          * test. */
 576         if (likely(sel->current && memcmp(&sel->current->key, &key, sizeof(key)) == 0)) {
 577                 return 0;
 578         }
 579
 580         /* lookup if we have other variants in the list */
 581         if (sel->num_shaders > 1) {
 582                 struct si_shader *p = sel->current, *c = p->next_variant;
 583
 584                 while (c && memcmp(&c->key, &key, sizeof(key)) != 0) {
 585                         p = c;
 586                         c = c->next_variant;
 587                 }
 588
 589                 if (c) {
 590                         p->next_variant = c->next_variant;
 591                         shader = c;
 592                 }
 593         }
 594
 595         if (shader) {
 596                 shader->next_variant = sel->current;
 597                 sel->current = shader;
 598         } else {
 599                 shader = CALLOC(1, sizeof(struct si_shader));
 600                 shader->selector = sel;
 601                 shader->key = key;
 602
 603                 shader->next_variant = sel->current;
 604                 sel->current = shader;
 605                 r = si_shader_create((struct si_screen*)ctx->screen, sctx->tm,
 606                                      shader);
 607                 if (unlikely(r)) {
 608                         R600_ERR("Failed to build shader variant (type=%u) %d\n",
 609                                  sel->type, r);
 610                         sel->current = NULL;
 611                         FREE(shader);
 612                         return r;
 613                 }
 614                 si_shader_init_pm4_state(shader);
 615                 sel->num_shaders++;
 616         }
 617
 618         return 0;
 619 }
 620
 621 static void *si_create_shader_state(struct pipe_context *ctx,
 622                                     const struct pipe_shader_state *state,
 623                                     unsigned pipe_shader_type)
 624 {
 625         struct si_screen *sscreen = (struct si_screen *)ctx->screen;
 626         struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector);
 627         int i;
 628
 629         sel->type = pipe_shader_type;
 630         sel->tokens = tgsi_dup_tokens(state->tokens);
 631         sel->so = state->stream_output;
 632         tgsi_scan_shader(state->tokens, &sel->info);
 633
 634         switch (pipe_shader_type) {
 635         case PIPE_SHADER_GEOMETRY:
 636                 sel->gs_output_prim =
 637                         sel->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
 638                 sel->gs_max_out_vertices =
 639                         sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
 640                 sel->gs_num_invocations =
 641                         sel->info.properties[TGSI_PROPERTY_GS_INVOCATIONS];
 642
 643                 for (i = 0; i < sel->info.num_inputs; i++) {
 644                         unsigned name = sel->info.input_semantic_name[i];
 645                         unsigned index = sel->info.input_semantic_index[i];
 646
 647                         switch (name) {
 648                         case TGSI_SEMANTIC_PRIMID:
 649                                 break;
 650                         default:
 651                                 sel->inputs_read |=
 652                                         1llu << si_shader_io_get_unique_index(name, index);
 653                         }
 654                 }
 655                 break;
 656
 657         case PIPE_SHADER_VERTEX:
 658         case PIPE_SHADER_TESS_CTRL:
 659                 for (i = 0; i < sel->info.num_outputs; i++) {
 660                         unsigned name = sel->info.output_semantic_name[i];
 661                         unsigned index = sel->info.output_semantic_index[i];
 662
 663                         switch (name) {
 664                         case TGSI_SEMANTIC_TESSINNER:
 665                         case TGSI_SEMANTIC_TESSOUTER:
 666                         case TGSI_SEMANTIC_PATCH:
 667                                 sel->patch_outputs_written |=
 668                                         1llu << si_shader_io_get_unique_index(name, index);
 669                                 break;
 670                         default:
 671                                 sel->outputs_written |=
 672                                         1llu << si_shader_io_get_unique_index(name, index);
 673                         }
 674                 }
 675                 break;
 676         }
 677
 678         if (sscreen->b.debug_flags & DBG_PRECOMPILE)
 679                 si_shader_select(ctx, sel);
 680
 681         return sel;
 682 }
 683
 684 static void *si_create_fs_state(struct pipe_context *ctx,
 685                                 const struct pipe_shader_state *state)
 686 {
 687         return si_create_shader_state(ctx, state, PIPE_SHADER_FRAGMENT);
 688 }
 689
 690 static void *si_create_gs_state(struct pipe_context *ctx,
 691                                 const struct pipe_shader_state *state)
 692 {
 693         return si_create_shader_state(ctx, state, PIPE_SHADER_GEOMETRY);
 694 }
 695
 696 static void *si_create_vs_state(struct pipe_context *ctx,
 697                                 const struct pipe_shader_state *state)
 698 {
 699         return si_create_shader_state(ctx, state, PIPE_SHADER_VERTEX);
 700 }
 701
 702 static void *si_create_tcs_state(struct pipe_context *ctx,
 703                                  const struct pipe_shader_state *state)
 704 {
 705         return si_create_shader_state(ctx, state, PIPE_SHADER_TESS_CTRL);
 706 }
 707
 708 static void *si_create_tes_state(struct pipe_context *ctx,
 709                                  const struct pipe_shader_state *state)
 710 {
 711         return si_create_shader_state(ctx, state, PIPE_SHADER_TESS_EVAL);
 712 }
 713
 714 static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
 715 {
 716         struct si_context *sctx = (struct si_context *)ctx;
 717         struct si_shader_selector *sel = state;
 718
 719         if (sctx->vs_shader == sel || !sel)
 720                 return;
 721
 722         sctx->vs_shader = sel;
 723         sctx->clip_regs.dirty = true;
 724 }
 725
 726 static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
 727 {
 728         struct si_context *sctx = (struct si_context *)ctx;
 729         struct si_shader_selector *sel = state;
 730         bool enable_changed = !!sctx->gs_shader != !!sel;
 731
 732         if (sctx->gs_shader == sel)
 733                 return;
 734
 735         sctx->gs_shader = sel;
 736         sctx->clip_regs.dirty = true;
 737         sctx->last_rast_prim = -1; /* reset this so that it gets updated */
 738
 739         if (enable_changed)
 740                 si_shader_change_notify(sctx);
 741 }
 742
 743 static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
 744 {
 745         struct si_context *sctx = (struct si_context *)ctx;
 746         struct si_shader_selector *sel = state;
 747
 748         if (sctx->tcs_shader == sel)
 749                 return;
 750
 751         sctx->tcs_shader = sel;
 752 }
 753
 754 static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
 755 {
 756         struct si_context *sctx = (struct si_context *)ctx;
 757         struct si_shader_selector *sel = state;
 758         bool enable_changed = !!sctx->tes_shader != !!sel;
 759
 760         if (sctx->tes_shader == sel)
 761                 return;
 762
 763         sctx->tes_shader = sel;
 764         sctx->clip_regs.dirty = true;
 765         sctx->last_rast_prim = -1; /* reset this so that it gets updated */
 766
 767         if (enable_changed)
 768                 si_shader_change_notify(sctx);
 769 }
 770
 771 static void si_make_dummy_ps(struct si_context *sctx)
 772 {
 773         if (!sctx->dummy_pixel_shader) {
 774                 sctx->dummy_pixel_shader =
 775                         util_make_fragment_cloneinput_shader(&sctx->b.b, 0,
 776                                                              TGSI_SEMANTIC_GENERIC,
 777                                                              TGSI_INTERPOLATE_CONSTANT);
 778         }
 779 }
 780
 781 static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
 782 {
 783         struct si_context *sctx = (struct si_context *)ctx;
 784         struct si_shader_selector *sel = state;
 785
 786         /* skip if supplied shader is one already in use */
 787         if (sctx->ps_shader == sel)
 788                 return;
 789
 790         /* use a dummy shader if binding a NULL shader */
 791         if (!sel) {
 792                 si_make_dummy_ps(sctx);
 793                 sel = sctx->dummy_pixel_shader;
 794         }
 795
 796         sctx->ps_shader = sel;
 797 }
 798
 799 static void si_delete_shader_selector(struct pipe_context *ctx,
 800                                       struct si_shader_selector *sel)
 801 {
 802         struct si_context *sctx = (struct si_context *)ctx;
 803         struct si_shader *p = sel->current, *c;
 804
 805         while (p) {
 806                 c = p->next_variant;
 807                 switch (sel->type) {
 808                 case PIPE_SHADER_VERTEX:
 809                         if (p->key.vs.as_ls)
 810                                 si_pm4_delete_state(sctx, ls, p->pm4);
 811                         else if (p->key.vs.as_es)
 812                                 si_pm4_delete_state(sctx, es, p->pm4);
 813                         else
 814                                 si_pm4_delete_state(sctx, vs, p->pm4);
 815                         break;
 816                 case PIPE_SHADER_TESS_CTRL:
 817                         si_pm4_delete_state(sctx, hs, p->pm4);
 818                         break;
 819                 case PIPE_SHADER_TESS_EVAL:
 820                         if (p->key.tes.as_es)
 821                                 si_pm4_delete_state(sctx, es, p->pm4);
 822                         else
 823                                 si_pm4_delete_state(sctx, vs, p->pm4);
 824                         break;
 825                 case PIPE_SHADER_GEOMETRY:
 826                         si_pm4_delete_state(sctx, gs, p->pm4);
 827                         si_pm4_delete_state(sctx, vs, p->gs_copy_shader->pm4);
 828                         break;
 829                 case PIPE_SHADER_FRAGMENT:
 830                         si_pm4_delete_state(sctx, ps, p->pm4);
 831                         break;
 832                 }
 833
 834                 si_shader_destroy(ctx, p);
 835                 free(p);
 836                 p = c;
 837         }
 838
 839         free(sel->tokens);
 840         free(sel);
 841 }
 842
 843 static void si_delete_vs_shader(struct pipe_context *ctx, void *state)
 844 {
 845         struct si_context *sctx = (struct si_context *)ctx;
 846         struct si_shader_selector *sel = (struct si_shader_selector *)state;
 847
 848         if (sctx->vs_shader == sel) {
 849                 sctx->vs_shader = NULL;
 850         }
 851
 852         si_delete_shader_selector(ctx, sel);
 853 }
 854
 855 static void si_delete_gs_shader(struct pipe_context *ctx, void *state)
 856 {
 857         struct si_context *sctx = (struct si_context *)ctx;
 858         struct si_shader_selector *sel = (struct si_shader_selector *)state;
 859
 860         if (sctx->gs_shader == sel) {
 861                 sctx->gs_shader = NULL;
 862         }
 863
 864         si_delete_shader_selector(ctx, sel);
 865 }
 866
 867 static void si_delete_ps_shader(struct pipe_context *ctx, void *state)
 868 {
 869         struct si_context *sctx = (struct si_context *)ctx;
 870         struct si_shader_selector *sel = (struct si_shader_selector *)state;
 871
 872         if (sctx->ps_shader == sel) {
 873                 sctx->ps_shader = NULL;
 874         }
 875
 876         si_delete_shader_selector(ctx, sel);
 877 }
 878
 879 static void si_delete_tcs_shader(struct pipe_context *ctx, void *state)
 880 {
 881         struct si_context *sctx = (struct si_context *)ctx;
 882         struct si_shader_selector *sel = (struct si_shader_selector *)state;
 883
 884         if (sctx->tcs_shader == sel) {
 885                 sctx->tcs_shader = NULL;
 886         }
 887
 888         si_delete_shader_selector(ctx, sel);
 889 }
 890
 891 static void si_delete_tes_shader(struct pipe_context *ctx, void *state)
 892 {
 893         struct si_context *sctx = (struct si_context *)ctx;
 894         struct si_shader_selector *sel = (struct si_shader_selector *)state;
 895
 896         if (sctx->tes_shader == sel) {
 897                 sctx->tes_shader = NULL;
 898         }
 899
 900         si_delete_shader_selector(ctx, sel);
 901 }
 902
 903 static void si_update_spi_map(struct si_context *sctx)
 904 {
 905         struct si_shader *ps = sctx->ps_shader->current;
 906         struct si_shader *vs = si_get_vs_state(sctx);
 907         struct tgsi_shader_info *psinfo = &ps->selector->info;
 908         struct tgsi_shader_info *vsinfo = &vs->selector->info;
 909         struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
 910         unsigned i, j, tmp;
 911
 912         for (i = 0; i < psinfo->num_inputs; i++) {
 913                 unsigned name = psinfo->input_semantic_name[i];
 914                 unsigned index = psinfo->input_semantic_index[i];
 915                 unsigned interpolate = psinfo->input_interpolate[i];
 916                 unsigned param_offset = ps->ps_input_param_offset[i];
 917
 918                 if (name == TGSI_SEMANTIC_POSITION ||
 919                     name == TGSI_SEMANTIC_FACE)
 920                         /* Read from preloaded VGPRs, not parameters */
 921                         continue;
 922
 923 bcolor:
 924                 tmp = 0;
 925
 926                 if (interpolate == TGSI_INTERPOLATE_CONSTANT ||
 927                     (interpolate == TGSI_INTERPOLATE_COLOR && sctx->flatshade))
 928                         tmp |= S_028644_FLAT_SHADE(1);
 929
 930                 if (name == TGSI_SEMANTIC_PCOORD ||
 931                     (name == TGSI_SEMANTIC_TEXCOORD &&
 932                      sctx->sprite_coord_enable & (1 << index))) {
 933                         tmp |= S_028644_PT_SPRITE_TEX(1);
 934                 }
 935
 936                 for (j = 0; j < vsinfo->num_outputs; j++) {
 937                         if (name == vsinfo->output_semantic_name[j] &&
 938                             index == vsinfo->output_semantic_index[j]) {
 939                                 tmp |= S_028644_OFFSET(vs->vs_output_param_offset[j]);
 940                                 break;
 941                         }
 942                 }
 943
 944                 if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(tmp)) {
 945                         /* No corresponding output found, load defaults into input.
 946                          * Don't set any other bits.
 947                          * (FLAT_SHADE=1 completely changes behavior) */
 948                         tmp = S_028644_OFFSET(0x20);
 949                 }
 950
 951                 si_pm4_set_reg(pm4,
 952                                R_028644_SPI_PS_INPUT_CNTL_0 + param_offset * 4,
 953                                tmp);
 954
 955                 if (name == TGSI_SEMANTIC_COLOR &&
 956                     ps->key.ps.color_two_side) {
 957                         name = TGSI_SEMANTIC_BCOLOR;
 958                         param_offset++;
 959                         goto bcolor;
 960                 }
 961         }
 962
 963         si_pm4_set_state(sctx, spi, pm4);
 964 }
 965
 966 /* Initialize state related to ESGS / GSVS ring buffers */
 967 static void si_init_gs_rings(struct si_context *sctx)
 968 {
 969         unsigned esgs_ring_size = 128 * 1024;
 970         unsigned gsvs_ring_size = 64 * 1024 * 1024;
 971
 972         assert(!sctx->gs_rings);
 973         sctx->gs_rings = CALLOC_STRUCT(si_pm4_state);
 974
 975         sctx->esgs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
 976                                        PIPE_USAGE_DEFAULT, esgs_ring_size);
 977
 978         sctx->gsvs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
 979                                              PIPE_USAGE_DEFAULT, gsvs_ring_size);
 980
 981         if (sctx->b.chip_class >= CIK) {
 982                 si_pm4_set_reg(sctx->gs_rings, R_030900_VGT_ESGS_RING_SIZE,
 983                                esgs_ring_size / 256);
 984                 si_pm4_set_reg(sctx->gs_rings, R_030904_VGT_GSVS_RING_SIZE,
 985                                gsvs_ring_size / 256);
 986         } else {
 987                 si_pm4_set_reg(sctx->gs_rings, R_0088C8_VGT_ESGS_RING_SIZE,
 988                                esgs_ring_size / 256);
 989                 si_pm4_set_reg(sctx->gs_rings, R_0088CC_VGT_GSVS_RING_SIZE,
 990                                gsvs_ring_size / 256);
 991         }
 992
 993         si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_ESGS,
 994                            sctx->esgs_ring, 0, esgs_ring_size,
 995                            true, true, 4, 64);
 996         si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_ESGS,
 997                            sctx->esgs_ring, 0, esgs_ring_size,
 998                            false, false, 0, 0);
 999         si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_GSVS,
1000                            sctx->gsvs_ring, 0, gsvs_ring_size,
1001                            false, false, 0, 0);
1002 }
1003
1004 /**
1005  * @returns 1 if \p sel has been updated to use a new scratch buffer and 0
1006  *          otherwise.
1007  */
1008 static unsigned si_update_scratch_buffer(struct si_context *sctx,
1009                                     struct si_shader_selector *sel)
1010 {
1011         struct si_shader *shader;
1012         uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
1013
1014         if (!sel)
1015                 return 0;
1016
1017         shader = sel->current;
1018
1019         /* This shader doesn't need a scratch buffer */
1020         if (shader->scratch_bytes_per_wave == 0)
1021                 return 0;
1022
1023         /* This shader is already configured to use the current
1024          * scratch buffer. */
1025         if (shader->scratch_bo == sctx->scratch_buffer)
1026                 return 0;
1027
1028         assert(sctx->scratch_buffer);
1029
1030         si_shader_apply_scratch_relocs(sctx, shader, scratch_va);
1031
1032         /* Replace the shader bo with a new bo that has the relocs applied. */
1033         si_shader_binary_upload(sctx->screen, shader);
1034
1035         /* Update the shader state to use the new shader bo. */
1036         si_shader_init_pm4_state(shader);
1037
1038         r600_resource_reference(&shader->scratch_bo, sctx->scratch_buffer);
1039
1040         return 1;
1041 }
1042
1043 static unsigned si_get_current_scratch_buffer_size(struct si_context *sctx)
1044 {
1045         if (!sctx->scratch_buffer)
1046                 return 0;
1047
1048         return sctx->scratch_buffer->b.b.width0;
1049 }
1050
1051 static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_context *sctx,
1052                                         struct si_shader_selector *sel)
1053 {
1054         if (!sel)
1055                 return 0;
1056
1057         return sel->current->scratch_bytes_per_wave;
1058 }
1059
1060 static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
1061 {
1062
1063         return MAX3(si_get_scratch_buffer_bytes_per_wave(sctx, sctx->ps_shader),
1064                         si_get_scratch_buffer_bytes_per_wave(sctx, sctx->gs_shader),
1065                         si_get_scratch_buffer_bytes_per_wave(sctx, sctx->vs_shader));
1066 }
1067
1068 static void si_update_spi_tmpring_size(struct si_context *sctx)
1069 {
1070         unsigned current_scratch_buffer_size =
1071                 si_get_current_scratch_buffer_size(sctx);
1072         unsigned scratch_bytes_per_wave =
1073                 si_get_max_scratch_bytes_per_wave(sctx);
1074         unsigned scratch_needed_size = scratch_bytes_per_wave *
1075                 sctx->scratch_waves;
1076
1077         if (scratch_needed_size > 0) {
1078
1079                 if (scratch_needed_size > current_scratch_buffer_size) {
1080                         /* Create a bigger scratch buffer */
1081                         pipe_resource_reference(
1082                                         (struct pipe_resource**)&sctx->scratch_buffer,
1083                                         NULL);
1084
1085                         sctx->scratch_buffer =
1086                                         si_resource_create_custom(&sctx->screen->b.b,
1087                                         PIPE_USAGE_DEFAULT, scratch_needed_size);
1088                 }
1089
1090                 /* Update the shaders, so they are using the latest scratch.  The
1091                  * scratch buffer may have been changed since these shaders were
1092                  * last used, so we still need to try to update them, even if
1093                  * they require scratch buffers smaller than the current size.
1094                  */
1095                 if (si_update_scratch_buffer(sctx, sctx->ps_shader))
1096                         si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
1097                 if (si_update_scratch_buffer(sctx, sctx->gs_shader))
1098                         si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
1099
1100                 /* VS can be bound as ES or VS. */
1101                 if (sctx->gs_shader) {
1102                         if (si_update_scratch_buffer(sctx, sctx->vs_shader))
1103                                 si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
1104                 } else {
1105                         if (si_update_scratch_buffer(sctx, sctx->vs_shader))
1106                                 si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
1107                 }
1108         }
1109
1110         /* The LLVM shader backend should be reporting aligned scratch_sizes. */
1111         assert((scratch_needed_size & ~0x3FF) == scratch_needed_size &&
1112                 "scratch size should already be aligned correctly.");
1113
1114         sctx->spi_tmpring_size = S_0286E8_WAVES(sctx->scratch_waves) |
1115                                 S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
1116 }
1117
1118 static void si_init_tess_factor_ring(struct si_context *sctx)
1119 {
1120         assert(!sctx->tf_state);
1121         sctx->tf_state = CALLOC_STRUCT(si_pm4_state);
1122
1123         sctx->tf_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
1124                                            PIPE_USAGE_DEFAULT,
1125                                            32768 * sctx->screen->b.info.max_se);
1126         sctx->b.clear_buffer(&sctx->b.b, sctx->tf_ring, 0,
1127                              sctx->tf_ring->width0, fui(0), false);
1128         assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
1129
1130         if (sctx->b.chip_class >= CIK) {
1131                 si_pm4_set_reg(sctx->tf_state, R_030938_VGT_TF_RING_SIZE,
1132                                S_030938_SIZE(sctx->tf_ring->width0 / 4));
1133                 si_pm4_set_reg(sctx->tf_state, R_030940_VGT_TF_MEMORY_BASE,
1134                                r600_resource(sctx->tf_ring)->gpu_address >> 8);
1135         } else {
1136                 si_pm4_set_reg(sctx->tf_state, R_008988_VGT_TF_RING_SIZE,
1137                                S_008988_SIZE(sctx->tf_ring->width0 / 4));
1138                 si_pm4_set_reg(sctx->tf_state, R_0089B8_VGT_TF_MEMORY_BASE,
1139                                r600_resource(sctx->tf_ring)->gpu_address >> 8);
1140         }
1141         si_pm4_add_bo(sctx->tf_state, r600_resource(sctx->tf_ring),
1142                       RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
1143         si_pm4_bind_state(sctx, tf_ring, sctx->tf_state);
1144
1145         si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_TESS_CTRL,
1146                            SI_RING_TESS_FACTOR, sctx->tf_ring, 0,
1147                            sctx->tf_ring->width0, false, false, 0, 0);
1148
1149         sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
1150 }
1151
1152 static void si_update_vgt_shader_config(struct si_context *sctx)
1153 {
1154         /* Calculate the index of the config.
1155          * 0 = VS, 1 = VS+GS, 2 = VS+Tess, 3 = VS+Tess+GS */
1156         unsigned index = 2*!!sctx->tes_shader + !!sctx->gs_shader;
1157         struct si_pm4_state **pm4 = &sctx->vgt_shader_config[index];
1158
1159         if (!*pm4) {
1160                 uint32_t stages = 0;
1161
1162                 *pm4 = CALLOC_STRUCT(si_pm4_state);
1163
1164                 if (sctx->tes_shader) {
1165                         stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) |
1166                                   S_028B54_HS_EN(1);
1167
1168                         if (sctx->gs_shader)
1169                                 stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) |
1170                                           S_028B54_GS_EN(1) |
1171                                           S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
1172                         else
1173                                 stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
1174                 } else if (sctx->gs_shader) {
1175                         stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
1176                                   S_028B54_GS_EN(1) |
1177                                   S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
1178                 }
1179
1180                 si_pm4_set_reg(*pm4, R_028B54_VGT_SHADER_STAGES_EN, stages);
1181                 if (!sctx->gs_shader)
1182                         si_pm4_set_reg(*pm4, R_028A40_VGT_GS_MODE, 0);
1183         }
1184         si_pm4_bind_state(sctx, vgt_shader_config, *pm4);
1185 }
1186
1187 void si_update_shaders(struct si_context *sctx)
1188 {
1189         struct pipe_context *ctx = (struct pipe_context*)sctx;
1190         struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
1191
1192         /* Update stages before GS. */
1193         if (sctx->tes_shader) {
1194                 if (!sctx->tf_state)
1195                         si_init_tess_factor_ring(sctx);
1196
1197                 /* VS as LS */
1198                 si_shader_select(ctx, sctx->vs_shader);
1199                 si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
1200
1201                 if (sctx->tcs_shader) {
1202                         si_shader_select(ctx, sctx->tcs_shader);
1203                         si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
1204                 } else {
1205                         assert(!"generate TCS shader");
1206                 }
1207
1208                 si_shader_select(ctx, sctx->tes_shader);
1209                 if (sctx->gs_shader) {
1210                         /* TES as ES */
1211                         si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
1212                 } else {
1213                         /* TES as VS */
1214                         si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
1215                         sctx->b.streamout.stride_in_dw = sctx->tes_shader->so.stride;
1216                 }
1217         } else if (sctx->gs_shader) {
1218                 /* VS as ES */
1219                 si_shader_select(ctx, sctx->vs_shader);
1220                 si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
1221         } else {
1222                 /* VS as VS */
1223                 si_shader_select(ctx, sctx->vs_shader);
1224                 si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
1225                 sctx->b.streamout.stride_in_dw = sctx->vs_shader->so.stride;
1226         }
1227
1228         /* Update GS. */
1229         if (sctx->gs_shader) {
1230                 si_shader_select(ctx, sctx->gs_shader);
1231                 si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
1232                 si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4);
1233                 sctx->b.streamout.stride_in_dw = sctx->gs_shader->so.stride;
1234
1235                 if (!sctx->gs_rings)
1236                         si_init_gs_rings(sctx);
1237                 if (sctx->emitted.named.gs_rings != sctx->gs_rings)
1238                         sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
1239                 si_pm4_bind_state(sctx, gs_rings, sctx->gs_rings);
1240
1241                 si_set_ring_buffer(ctx, PIPE_SHADER_GEOMETRY, SI_RING_GSVS,
1242                                    sctx->gsvs_ring,
1243                                    sctx->gs_shader->gs_max_out_vertices *
1244                                    sctx->gs_shader->info.num_outputs * 16,
1245                                    64, true, true, 4, 16);
1246         } else {
1247                 si_pm4_bind_state(sctx, gs_rings, NULL);
1248                 si_pm4_bind_state(sctx, gs, NULL);
1249                 si_pm4_bind_state(sctx, es, NULL);
1250         }
1251
1252         si_update_vgt_shader_config(sctx);
1253
1254         si_shader_select(ctx, sctx->ps_shader);
1255
1256         if (!sctx->ps_shader->current) {
1257                 struct si_shader_selector *sel;
1258
1259                 /* use a dummy shader if compiling the shader (variant) failed */
1260                 si_make_dummy_ps(sctx);
1261                 sel = sctx->dummy_pixel_shader;
1262                 si_shader_select(ctx, sel);
1263                 sctx->ps_shader->current = sel->current;
1264         }
1265
1266         si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
1267
1268         if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
1269             sctx->sprite_coord_enable != rs->sprite_coord_enable ||
1270             sctx->flatshade != rs->flatshade) {
1271                 sctx->sprite_coord_enable = rs->sprite_coord_enable;
1272                 sctx->flatshade = rs->flatshade;
1273                 si_update_spi_map(sctx);
1274         }
1275
1276         if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
1277             si_pm4_state_changed(sctx, gs)) {
1278                 si_update_spi_tmpring_size(sctx);
1279         }
1280
1281         if (sctx->ps_db_shader_control != sctx->ps_shader->current->db_shader_control) {
1282                 sctx->ps_db_shader_control = sctx->ps_shader->current->db_shader_control;
1283                 sctx->db_render_state.dirty = true;
1284         }
1285
1286         if (sctx->smoothing_enabled != sctx->ps_shader->current->key.ps.poly_line_smoothing) {
1287                 sctx->smoothing_enabled = sctx->ps_shader->current->key.ps.poly_line_smoothing;
1288                 sctx->msaa_config.dirty = true;
1289
1290                 if (sctx->b.chip_class == SI)
1291                         sctx->db_render_state.dirty = true;
1292         }
1293 }
1294
1295 void si_init_shader_functions(struct si_context *sctx)
1296 {
1297         sctx->b.b.create_vs_state = si_create_vs_state;
1298         sctx->b.b.create_tcs_state = si_create_tcs_state;
1299         sctx->b.b.create_tes_state = si_create_tes_state;
1300         sctx->b.b.create_gs_state = si_create_gs_state;
1301         sctx->b.b.create_fs_state = si_create_fs_state;
1302
1303         sctx->b.b.bind_vs_state = si_bind_vs_shader;
1304         sctx->b.b.bind_tcs_state = si_bind_tcs_shader;
1305         sctx->b.b.bind_tes_state = si_bind_tes_shader;
1306         sctx->b.b.bind_gs_state = si_bind_gs_shader;
1307         sctx->b.b.bind_fs_state = si_bind_ps_shader;
1308
1309         sctx->b.b.delete_vs_state = si_delete_vs_shader;
1310         sctx->b.b.delete_tcs_state = si_delete_tcs_shader;
1311         sctx->b.b.delete_tes_state = si_delete_tes_shader;
1312         sctx->b.b.delete_gs_state = si_delete_gs_shader;
1313         sctx->b.b.delete_fs_state = si_delete_ps_shader;
1314 }