*    more than 2 times the number of instance count.
        */
       assert((devinfo->max_tcs_threads / 2) > tcs_prog_data->instances);
+      hs.DispatchGRFStartRegisterForURBData = prog_data->dispatch_grf_start_reg & 0x1f;
+      hs.DispatchGRFStartRegisterForURBData5 = prog_data->dispatch_grf_start_reg >> 5;
 #endif
 
       hs.InstanceCount = tcs_prog_data->instances - 1;
 
 
    if (compiler->use_tcs_8_patch &&
        nir->info.tess.tcs_vertices_out <= (devinfo->gen >= 12 ? 32 : 16) &&
-       2 + has_primitive_id + key->input_vertices <= 31) {
+       2 + has_primitive_id + key->input_vertices <= (devinfo->gen >= 12 ? 63 : 31)) {
       /* 3DSTATE_HS imposes two constraints on using 8_PATCH mode. First, the
        * "Instance" field limits the number of output vertices to [1, 16] on
        * gen11 and below, or [1, 32] on gen12 and above. Secondly, the
 
       <value name="9-12 Samplers" value="3"/>
       <value name="13-16 Samplers" value="4"/>
     </field>
-    <field name="Instance Count" start="64" end="67" type="uint"/>
+    <field name="Instance Count" start="64" end="68" type="uint"/>
     <field name="Maximum Number of Threads" start="72" end="80" type="uint"/>
     <field name="Statistics Enable" start="93" end="93" type="bool"/>
     <field name="Enable" start="95" end="95" type="bool"/>
 
       hs.VertexURBEntryReadLength = 0;
       hs.VertexURBEntryReadOffset = 0;
       hs.DispatchGRFStartRegisterForURBData =
-         tcs_prog_data->base.base.dispatch_grf_start_reg;
+         tcs_prog_data->base.base.dispatch_grf_start_reg & 0x1f;
+#if GEN_GEN >= 12
+      hs.DispatchGRFStartRegisterForURBData5 =
+         tcs_prog_data->base.base.dispatch_grf_start_reg >> 5;
+#endif
+
 
       hs.PerThreadScratchSpace = get_scratch_space(tcs_bin);
       hs.ScratchSpaceBasePointer =