tu: Support pipelines without a fragment shader
[mesa.git] / src / freedreno / vulkan / tu_pipeline.c
index c3a467ebbfca94c9a244e6084d7ae843b0c34472..3362945728928880de0c449b5874f918ea10a130 100644 (file)
@@ -325,6 +325,8 @@ tu_dynamic_state_bit(VkDynamicState state)
       return TU_DYNAMIC_STENCIL_WRITE_MASK;
    case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
       return TU_DYNAMIC_STENCIL_REFERENCE;
       return TU_DYNAMIC_STENCIL_WRITE_MASK;
    case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
       return TU_DYNAMIC_STENCIL_REFERENCE;
+   case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
+      return TU_DYNAMIC_SAMPLE_LOCATIONS;
    default:
       unreachable("invalid dynamic state");
       return 0;
    default:
       unreachable("invalid dynamic state");
       return 0;
@@ -646,7 +648,7 @@ tu6_emit_gs_config(struct tu_cs *cs, struct tu_shader *shader,
                    const struct ir3_shader_variant *gs)
 {
    bool has_gs = gs->type != MESA_SHADER_NONE;
                    const struct ir3_shader_variant *gs)
 {
    bool has_gs = gs->type != MESA_SHADER_NONE;
-   tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_UNKNOWN_A871, 1);
+   tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_PRIM_SIZE, 1);
    tu_cs_emit(cs, 0);
 
    tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_CONFIG, 2);
    tu_cs_emit(cs, 0);
 
    tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_CONFIG, 2);
@@ -730,7 +732,8 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader,
 static void
 tu6_emit_vs_system_values(struct tu_cs *cs,
                           const struct ir3_shader_variant *vs,
 static void
 tu6_emit_vs_system_values(struct tu_cs *cs,
                           const struct ir3_shader_variant *vs,
-                          const struct ir3_shader_variant *gs)
+                          const struct ir3_shader_variant *gs,
+                          bool primid_passthru)
 {
    const uint32_t vertexid_regid =
          ir3_find_sysval_regid(vs, SYSTEM_VALUE_VERTEX_ID);
 {
    const uint32_t vertexid_regid =
          ir3_find_sysval_regid(vs, SYSTEM_VALUE_VERTEX_ID);
@@ -753,7 +756,7 @@ tu6_emit_vs_system_values(struct tu_cs *cs,
    tu_cs_emit(cs, 0x000000fc); /* VFD_CONTROL_4 */
    tu_cs_emit(cs, A6XX_VFD_CONTROL_5_REGID_GSHEADER(gsheader_regid) |
                   0xfc00); /* VFD_CONTROL_5 */
    tu_cs_emit(cs, 0x000000fc); /* VFD_CONTROL_4 */
    tu_cs_emit(cs, A6XX_VFD_CONTROL_5_REGID_GSHEADER(gsheader_regid) |
                   0xfc00); /* VFD_CONTROL_5 */
-   tu_cs_emit(cs, 0x00000000); /* VFD_CONTROL_6 */
+   tu_cs_emit(cs, COND(primid_passthru, A6XX_VFD_CONTROL_6_PRIMID_PASSTHRU)); /* VFD_CONTROL_6 */
 }
 
 /* Add any missing varyings needed for stream-out. Otherwise varyings not
 }
 
 /* Add any missing varyings needed for stream-out. Otherwise varyings not
@@ -825,6 +828,10 @@ tu6_setup_streamout(const struct ir3_shader_variant *v,
       unsigned k = out->register_index;
       unsigned idx;
 
       unsigned k = out->register_index;
       unsigned idx;
 
+      /* Skip it, if there's an unused reg in the middle of outputs. */
+      if (v->outputs[k].regid == INVALID_REG)
+         continue;
+
       tf->ncomp[out->output_buffer] += out->num_components;
 
       /* linkage map sorted by order frag shader wants things, so
       tf->ncomp[out->output_buffer] += out->num_components;
 
       /* linkage map sorted by order frag shader wants things, so
@@ -891,6 +898,8 @@ tu6_emit_link_map(struct tu_cs *cs,
    int size = DIV_ROUND_UP(num_loc, 4);
 
    size = (MIN2(size + base, consumer->constlen) - base) * 4;
    int size = DIV_ROUND_UP(num_loc, 4);
 
    size = (MIN2(size + base, consumer->constlen) - base) * 4;
+   if (size <= 0)
+      return;
 
    tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, base, SB6_GS_SHADER, 0, size,
                   patch_locs);
 
    tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, base, SB6_GS_SHADER, 0, size,
                   patch_locs);
@@ -921,23 +930,22 @@ tu6_emit_vpc(struct tu_cs *cs,
    bool has_gs = gs->type != MESA_SHADER_NONE;
    const struct ir3_shader_variant *last_shader = has_gs ? gs : vs;
    struct ir3_shader_linkage linkage = { 0 };
    bool has_gs = gs->type != MESA_SHADER_NONE;
    const struct ir3_shader_variant *last_shader = has_gs ? gs : vs;
    struct ir3_shader_linkage linkage = { 0 };
-   ir3_link_shaders(&linkage, last_shader, fs);
+   ir3_link_shaders(&linkage, last_shader, fs, true);
 
    if (last_shader->shader->stream_output.num_outputs)
       tu6_link_streamout(&linkage, last_shader);
 
 
    if (last_shader->shader->stream_output.num_outputs)
       tu6_link_streamout(&linkage, last_shader);
 
-   BITSET_DECLARE(vpc_var_enables, 128) = { 0 };
-   for (uint32_t i = 0; i < linkage.cnt; i++) {
-      const uint32_t comp_count = util_last_bit(linkage.var[i].compmask);
-      for (uint32_t j = 0; j < comp_count; j++)
-         BITSET_SET(vpc_var_enables, linkage.var[i].loc + j);
-   }
+   /* We do this after linking shaders in order to know whether PrimID
+    * passthrough needs to be enabled.
+    */
+   bool primid_passthru = linkage.primid_loc != 0xff;
+   tu6_emit_vs_system_values(cs, vs, gs, primid_passthru);
 
    tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VAR_DISABLE(0), 4);
 
    tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VAR_DISABLE(0), 4);
-   tu_cs_emit(cs, ~vpc_var_enables[0]);
-   tu_cs_emit(cs, ~vpc_var_enables[1]);
-   tu_cs_emit(cs, ~vpc_var_enables[2]);
-   tu_cs_emit(cs, ~vpc_var_enables[3]);
+   tu_cs_emit(cs, ~linkage.varmask[0]);
+   tu_cs_emit(cs, ~linkage.varmask[1]);
+   tu_cs_emit(cs, ~linkage.varmask[2]);
+   tu_cs_emit(cs, ~linkage.varmask[3]);
 
    /* a6xx finds position/pointsize at the end */
    const uint32_t position_regid =
 
    /* a6xx finds position/pointsize at the end */
    const uint32_t position_regid =
@@ -990,10 +998,14 @@ tu6_emit_vpc(struct tu_cs *cs,
       tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_VPC_DST_REG(0), sp_vpc_dst_count);
    tu_cs_emit_array(cs, sp_vpc_dst, sp_vpc_dst_count);
 
       tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_VPC_DST_REG(0), sp_vpc_dst_count);
    tu_cs_emit_array(cs, sp_vpc_dst, sp_vpc_dst_count);
 
+   tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMID_CNTL, 1);
+   tu_cs_emit(cs, COND(primid_passthru, A6XX_PC_PRIMID_CNTL_PRIMID_PASSTHRU));
+
    tu_cs_emit_pkt4(cs, REG_A6XX_VPC_CNTL_0, 1);
    tu_cs_emit(cs, A6XX_VPC_CNTL_0_NUMNONPOSVAR(fs->total_in) |
                      (fs->total_in > 0 ? A6XX_VPC_CNTL_0_VARYING : 0) |
    tu_cs_emit_pkt4(cs, REG_A6XX_VPC_CNTL_0, 1);
    tu_cs_emit(cs, A6XX_VPC_CNTL_0_NUMNONPOSVAR(fs->total_in) |
                      (fs->total_in > 0 ? A6XX_VPC_CNTL_0_VARYING : 0) |
-                     0xff00ff00);
+                     A6XX_VPC_CNTL_0_PRIMIDLOC(linkage.primid_loc) |
+                     A6XX_VPC_CNTL_0_UNKLOC(0xff));
 
    tu_cs_emit_pkt4(cs, REG_A6XX_VPC_PACK, 1);
    tu_cs_emit(cs, A6XX_VPC_PACK_POSITIONLOC(position_loc) |
 
    tu_cs_emit_pkt4(cs, REG_A6XX_VPC_PACK, 1);
    tu_cs_emit(cs, A6XX_VPC_PACK_POSITIONLOC(position_loc) |
@@ -1068,7 +1080,7 @@ tu6_emit_vpc(struct tu_cs *cs,
       tu_cs_emit_pkt4(cs, REG_A6XX_PC_UNKNOWN_9B07, 1);
       tu_cs_emit(cs, 0);
 
       tu_cs_emit_pkt4(cs, REG_A6XX_PC_UNKNOWN_9B07, 1);
       tu_cs_emit(cs, 0);
 
-      tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_UNKNOWN_A871, 1);
+      tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_PRIM_SIZE, 1);
       tu_cs_emit(cs, vs->shader->output_size);
    }
 
       tu_cs_emit(cs, vs->shader->output_size);
    }
 
@@ -1546,7 +1558,6 @@ tu6_emit_program(struct tu_cs *cs,
    tu6_emit_gs_config(cs, builder->shaders[MESA_SHADER_GEOMETRY], gs);
    tu6_emit_fs_config(cs, builder->shaders[MESA_SHADER_FRAGMENT], fs);
 
    tu6_emit_gs_config(cs, builder->shaders[MESA_SHADER_GEOMETRY], gs);
    tu6_emit_fs_config(cs, builder->shaders[MESA_SHADER_FRAGMENT], fs);
 
-   tu6_emit_vs_system_values(cs, vs, gs);
    tu6_emit_vpc(cs, vs, gs, fs, binning_pass, tf);
    tu6_emit_vpc_varying_modes(cs, fs, binning_pass);
    tu6_emit_fs_inputs(cs, fs);
    tu6_emit_vpc(cs, vs, gs, fs, binning_pass, tf);
    tu6_emit_vpc_varying_modes(cs, fs, binning_pass);
    tu6_emit_fs_inputs(cs, fs);
@@ -1733,6 +1744,47 @@ tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissor)
                      A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(max.y - 1));
 }
 
                      A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(max.y - 1));
 }
 
+void
+tu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp_loc)
+{
+   if (!samp_loc) {
+      tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SAMPLE_CONFIG, 1);
+      tu_cs_emit(cs, 0);
+
+      tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_CONFIG, 1);
+      tu_cs_emit(cs, 0);
+
+      tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_SAMPLE_CONFIG, 1);
+      tu_cs_emit(cs, 0);
+      return;
+   }
+
+   assert(samp_loc->sampleLocationsPerPixel == samp_loc->sampleLocationsCount);
+   assert(samp_loc->sampleLocationGridSize.width == 1);
+   assert(samp_loc->sampleLocationGridSize.height == 1);
+
+   uint32_t sample_config =
+      A6XX_RB_SAMPLE_CONFIG_LOCATION_ENABLE;
+   uint32_t sample_locations = 0;
+   for (uint32_t i = 0; i < samp_loc->sampleLocationsCount; i++) {
+      sample_locations |=
+         (A6XX_RB_SAMPLE_LOCATION_0_SAMPLE_0_X(samp_loc->pSampleLocations[i].x) |
+          A6XX_RB_SAMPLE_LOCATION_0_SAMPLE_0_Y(samp_loc->pSampleLocations[i].y)) << i*8;
+   }
+
+   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SAMPLE_CONFIG, 2);
+   tu_cs_emit(cs, sample_config);
+   tu_cs_emit(cs, sample_locations);
+
+   tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_CONFIG, 2);
+   tu_cs_emit(cs, sample_config);
+   tu_cs_emit(cs, sample_locations);
+
+   tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_SAMPLE_CONFIG, 2);
+   tu_cs_emit(cs, sample_config);
+   tu_cs_emit(cs, sample_locations);
+}
+
 static void
 tu6_emit_gras_unknowns(struct tu_cs *cs)
 {
 static void
 tu6_emit_gras_unknowns(struct tu_cs *cs)
 {
@@ -2064,7 +2116,7 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder)
    for (gl_shader_stage stage = MESA_SHADER_STAGES - 1;
         stage > MESA_SHADER_NONE; stage--) {
       const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage];
    for (gl_shader_stage stage = MESA_SHADER_STAGES - 1;
         stage > MESA_SHADER_NONE; stage--) {
       const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage];
-      if (!stage_info)
+      if (!stage_info && stage != MESA_SHADER_FRAGMENT)
          continue;
 
       struct tu_shader *shader =
          continue;
 
       struct tu_shader *shader =
@@ -2405,7 +2457,7 @@ tu_pipeline_builder_parse_multisample_and_color_blend(
                                      : &dummy_blend_info;
 
    struct tu_cs blend_cs;
                                      : &dummy_blend_info;
 
    struct tu_cs blend_cs;
-   tu_cs_begin_sub_stream(&pipeline->cs, MAX_RTS * 3 + 9, &blend_cs);
+   tu_cs_begin_sub_stream(&pipeline->cs, MAX_RTS * 3 + 18, &blend_cs);
 
    uint32_t blend_enable_mask;
    tu6_emit_rb_mrt_controls(&blend_cs, blend_info,
 
    uint32_t blend_enable_mask;
    tu6_emit_rb_mrt_controls(&blend_cs, blend_info,
@@ -2415,6 +2467,17 @@ tu_pipeline_builder_parse_multisample_and_color_blend(
    if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_BLEND_CONSTANTS))
       tu6_emit_blend_constants(&blend_cs, blend_info->blendConstants);
 
    if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_BLEND_CONSTANTS))
       tu6_emit_blend_constants(&blend_cs, blend_info->blendConstants);
 
+   if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_SAMPLE_LOCATIONS)) {
+      const struct VkPipelineSampleLocationsStateCreateInfoEXT *sample_locations =
+         vk_find_struct_const(msaa_info->pNext, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
+      const VkSampleLocationsInfoEXT *samp_loc = NULL;
+
+      if (sample_locations && sample_locations->sampleLocationsEnable)
+         samp_loc = &sample_locations->sampleLocationsInfo;
+
+      tu6_emit_sample_locations(&blend_cs, samp_loc);
+   }
+
    tu6_emit_blend_control(&blend_cs, blend_enable_mask, msaa_info);
 
    pipeline->blend.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &blend_cs);
    tu6_emit_blend_control(&blend_cs, blend_enable_mask, msaa_info);
 
    pipeline->blend.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &blend_cs);