i965/vec4: use the IR's execution size
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_generator.cpp
index 695c4df258b313f320eb10a7bda6feef1848789e..707bd91882e7bf58e182db3bdce691f65b305737 100644 (file)
@@ -112,7 +112,7 @@ generate_tex(struct brw_codegen *p,
              struct brw_reg surface_index,
              struct brw_reg sampler_index)
 {
-   const struct brw_device_info *devinfo = p->devinfo;
+   const struct gen_device_info *devinfo = p->devinfo;
    int msg_type = -1;
 
    if (devinfo->gen >= 5) {
@@ -295,11 +295,15 @@ generate_tex(struct brw_codegen *p,
       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
       brw_set_default_access_mode(p, BRW_ALIGN_1);
 
-      if (memcmp(&surface_reg, &sampler_reg, sizeof(surface_reg)) == 0) {
+      if (brw_regs_equal(&surface_reg, &sampler_reg)) {
          brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101));
       } else {
-         brw_SHL(p, addr, sampler_reg, brw_imm_ud(8));
-         brw_OR(p, addr, addr, surface_reg);
+         if (sampler_reg.file == BRW_IMMEDIATE_VALUE) {
+            brw_OR(p, addr, surface_reg, brw_imm_ud(sampler_reg.ud << 8));
+         } else {
+            brw_SHL(p, addr, sampler_reg, brw_imm_ud(8));
+            brw_OR(p, addr, addr, surface_reg);
+         }
       }
       if (base_binding_table_index)
          brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
@@ -485,10 +489,13 @@ generate_gs_svb_write(struct brw_codegen *p,
    bool final_write = inst->sol_final_write;
 
    brw_push_insn_state(p);
+   brw_set_default_exec_size(p, BRW_EXECUTE_4);
    /* Copy Vertex data into M0.x */
    brw_MOV(p, stride(dst, 4, 4, 1),
            stride(retype(src0, BRW_REGISTER_TYPE_UD), 4, 4, 1));
+   brw_pop_insn_state(p);
 
+   brw_push_insn_state(p);
    /* Send SVB Write */
    brw_svb_write(p,
                  final_write ? src1 : brw_null_reg(), /* dest == src1 */
@@ -702,8 +709,10 @@ generate_gs_ff_sync(struct brw_codegen *p,
    brw_MOV(p, get_element_ud(header, 0), get_element_ud(dst, 0));
 
    /* src1 is not an immediate when we use transform feedback */
-   if (src1.file != BRW_IMMEDIATE_VALUE)
+   if (src1.file != BRW_IMMEDIATE_VALUE) {
+      brw_set_default_exec_size(p, BRW_EXECUTE_4);
       brw_MOV(p, brw_vec4_grf(src1.nr, 0), brw_vec4_grf(dst.nr, 1));
+   }
 
    brw_pop_insn_state(p);
 }
@@ -723,7 +732,7 @@ generate_gs_set_primitive_id(struct brw_codegen *p, struct brw_reg dst)
 static void
 generate_tcs_get_instance_id(struct brw_codegen *p, struct brw_reg dst)
 {
-   const struct brw_device_info *devinfo = p->devinfo;
+   const struct gen_device_info *devinfo = p->devinfo;
    const bool ivb = devinfo->is_ivybridge || devinfo->is_baytrail;
 
    /* "Instance Count" comes as part of the payload in r0.2 bits 23:17.
@@ -754,7 +763,7 @@ generate_tcs_urb_write(struct brw_codegen *p,
                        vec4_instruction *inst,
                        struct brw_reg urb_header)
 {
-   const struct brw_device_info *devinfo = p->devinfo;
+   const struct gen_device_info *devinfo = p->devinfo;
 
    brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
    brw_set_dest(p, send, brw_null_reg());
@@ -924,7 +933,7 @@ generate_vec4_urb_read(struct brw_codegen *p,
                        struct brw_reg dst,
                        struct brw_reg header)
 {
-   const struct brw_device_info *devinfo = p->devinfo;
+   const struct gen_device_info *devinfo = p->devinfo;
 
    assert(header.file == BRW_GENERAL_REGISTER_FILE);
    assert(header.type == BRW_REGISTER_TYPE_UD);
@@ -949,7 +958,7 @@ generate_tcs_release_input(struct brw_codegen *p,
                            struct brw_reg vertex,
                            struct brw_reg is_unpaired)
 {
-   const struct brw_device_info *devinfo = p->devinfo;
+   const struct gen_device_info *devinfo = p->devinfo;
 
    assert(vertex.file == BRW_IMMEDIATE_VALUE);
    assert(vertex.type == BRW_REGISTER_TYPE_UD);
@@ -1029,7 +1038,7 @@ generate_tcs_create_barrier_header(struct brw_codegen *p,
                                    struct brw_vue_prog_data *prog_data,
                                    struct brw_reg dst)
 {
-   const struct brw_device_info *devinfo = p->devinfo;
+   const struct gen_device_info *devinfo = p->devinfo;
    const bool ivb = devinfo->is_ivybridge || devinfo->is_baytrail;
    struct brw_reg m0_2 = get_element_ud(dst, 2);
    unsigned instances = ((struct brw_tcs_prog_data *) prog_data)->instances;
@@ -1118,7 +1127,7 @@ generate_scratch_read(struct brw_codegen *p,
                       struct brw_reg dst,
                       struct brw_reg index)
 {
-   const struct brw_device_info *devinfo = p->devinfo;
+   const struct gen_device_info *devinfo = p->devinfo;
    struct brw_reg header = brw_vec8_grf(0, 0);
 
    gen6_resolve_implied_move(p, &header, inst->base_mrf);
@@ -1135,6 +1144,11 @@ generate_scratch_read(struct brw_codegen *p,
    else
       msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
 
+   const unsigned target_cache =
+      devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE :
+      devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
+      BRW_DATAPORT_READ_TARGET_RENDER_CACHE;
+
    /* Each of the 8 channel enables is considered for whether each
     * dword is written.
     */
@@ -1146,8 +1160,7 @@ generate_scratch_read(struct brw_codegen *p,
    brw_set_dp_read_message(p, send,
                            brw_scratch_surface_idx(p),
                           BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
-                          msg_type,
-                          BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
+                          msg_type, target_cache,
                           2, /* mlen */
                            true, /* header_present */
                           1 /* rlen */);
@@ -1160,7 +1173,11 @@ generate_scratch_write(struct brw_codegen *p,
                        struct brw_reg src,
                        struct brw_reg index)
 {
-   const struct brw_device_info *devinfo = p->devinfo;
+   const struct gen_device_info *devinfo = p->devinfo;
+   const unsigned target_cache =
+      (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE :
+       devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
+       BRW_DATAPORT_READ_TARGET_RENDER_CACHE);
    struct brw_reg header = brw_vec8_grf(0, 0);
    bool write_commit;
 
@@ -1220,6 +1237,7 @@ generate_scratch_write(struct brw_codegen *p,
                             brw_scratch_surface_idx(p),
                            BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
                            msg_type,
+                            target_cache,
                            3, /* mlen */
                            true, /* header present */
                            false, /* not a render target write */
@@ -1236,7 +1254,10 @@ generate_pull_constant_load(struct brw_codegen *p,
                             struct brw_reg index,
                             struct brw_reg offset)
 {
-   const struct brw_device_info *devinfo = p->devinfo;
+   const struct gen_device_info *devinfo = p->devinfo;
+   const unsigned target_cache =
+      (devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_SAMPLER_CACHE :
+       BRW_DATAPORT_READ_TARGET_DATA_CACHE);
    assert(index.file == BRW_IMMEDIATE_VALUE &&
          index.type == BRW_REGISTER_TYPE_UD);
    uint32_t surf_index = index.ud;
@@ -1282,7 +1303,7 @@ generate_pull_constant_load(struct brw_codegen *p,
                           surf_index,
                           BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
                           msg_type,
-                          BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+                           target_cache,
                           2, /* mlen */
                            true, /* header_present */
                           1 /* rlen */);
@@ -1400,39 +1421,60 @@ generate_mov_indirect(struct brw_codegen *p,
                       struct brw_reg indirect, struct brw_reg length)
 {
    assert(indirect.type == BRW_REGISTER_TYPE_UD);
+   assert(p->devinfo->gen >= 6);
 
    unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr * (REG_SIZE / 2);
 
    /* This instruction acts in align1 mode */
-   assert(inst->force_writemask_all || reg.writemask == 0xf);
+   assert(dst.writemask == WRITEMASK_XYZW);
 
-   brw_push_insn_state(p);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+   if (indirect.file == BRW_IMMEDIATE_VALUE) {
+      imm_byte_offset += indirect.ud;
 
-   struct brw_reg addr = vec2(brw_address_reg(0));
+      reg.nr = imm_byte_offset / REG_SIZE;
+      reg.subnr = (imm_byte_offset / (REG_SIZE / 2)) % 2;
+      unsigned shift = (imm_byte_offset / 4) % 4;
+      reg.swizzle += BRW_SWIZZLE4(shift, shift, shift, shift);
 
-   /* We need to move the indirect value into the address register.  In order
-    * to make things make some sense, we want to respect at least the X
-    * component of the swizzle.  In order to do that, we need to convert the
-    * subnr (probably 0) to an align1 subnr and add in the swizzle.  We then
-    * use a region of <8,4,0>:uw to pick off the first 2 bytes of the indirect
-    * and splat it out to all four channels of the given half of a0.
-    */
-   assert(brw_is_single_value_swizzle(indirect.swizzle));
-   indirect.subnr = (indirect.subnr * 4 + BRW_GET_SWZ(indirect.swizzle, 0)) * 2;
-   indirect = stride(retype(indirect, BRW_REGISTER_TYPE_UW), 8, 4, 0);
+      brw_MOV(p, dst, reg);
+   } else {
+      brw_push_insn_state(p);
+      brw_set_default_access_mode(p, BRW_ALIGN_1);
+      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
 
-   brw_ADD(p, addr, indirect, brw_imm_uw(imm_byte_offset));
+      struct brw_reg addr = vec8(brw_address_reg(0));
 
-   /* Use a <4,1> region Vx1 region*/
-   struct brw_reg src = brw_VxH_indirect(0, 0);
-   src.width = BRW_WIDTH_4;
-   src.hstride = BRW_HORIZONTAL_STRIDE_1;
+      /* We need to move the indirect value into the address register.  In
+       * order to make things make some sense, we want to respect at least the
+       * X component of the swizzle.  In order to do that, we need to convert
+       * the subnr (probably 0) to an align1 subnr and add in the swizzle.
+       */
+      assert(brw_is_single_value_swizzle(indirect.swizzle));
+      indirect.subnr = (indirect.subnr * 4 + BRW_GET_SWZ(indirect.swizzle, 0));
 
-   brw_MOV(p, dst, retype(src, reg.type));
+      /* We then use a region of <8,4,0>:uw to pick off the first 2 bytes of
+       * the indirect and splat it out to all four channels of the given half
+       * of a0.
+       */
+      indirect.subnr *= 2;
+      indirect = stride(retype(indirect, BRW_REGISTER_TYPE_UW), 8, 4, 0);
+      brw_ADD(p, addr, indirect, brw_imm_uw(imm_byte_offset));
+
+      /* Now we need to incorporate the swizzle from the source register */
+      if (reg.swizzle != BRW_SWIZZLE_XXXX) {
+         uint32_t uv_swiz = BRW_GET_SWZ(reg.swizzle, 0) << 2 |
+                            BRW_GET_SWZ(reg.swizzle, 1) << 6 |
+                            BRW_GET_SWZ(reg.swizzle, 2) << 10 |
+                            BRW_GET_SWZ(reg.swizzle, 3) << 14;
+         uv_swiz |= uv_swiz << 16;
+
+         brw_ADD(p, addr, addr, brw_imm_uv(uv_swiz));
+      }
 
-   brw_pop_insn_state(p);
+      brw_MOV(p, dst, retype(brw_VxH_indirect(0, 0), reg.type));
+
+      brw_pop_insn_state(p);
+   }
 }
 
 static void
@@ -1443,12 +1485,13 @@ generate_code(struct brw_codegen *p,
               struct brw_vue_prog_data *prog_data,
               const struct cfg_t *cfg)
 {
-   const struct brw_device_info *devinfo = p->devinfo;
+   const struct gen_device_info *devinfo = p->devinfo;
    const char *stage_abbrev = _mesa_shader_stage_to_abbrev(nir->stage);
    bool debug_flag = INTEL_DEBUG &
       intel_debug_flag_for_shader_stage(nir->stage);
    struct annotation_info annotation;
    memset(&annotation, 0, sizeof(annotation));
+   int spill_count = 0, fill_count = 0;
    int loop_count = 0;
 
    foreach_block_and_inst (block, vec4_instruction, inst, cfg) {
@@ -1468,37 +1511,13 @@ generate_code(struct brw_codegen *p,
       brw_set_default_saturate(p, inst->saturate);
       brw_set_default_mask_control(p, inst->force_writemask_all);
       brw_set_default_acc_write_control(p, inst->writes_accumulator);
+      brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
 
       assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->gen));
       assert(inst->mlen <= BRW_MAX_MSG_LENGTH);
 
       unsigned pre_emit_nr_insn = p->nr_insn;
 
-      if (dst.width == BRW_WIDTH_4) {
-         /* This happens in attribute fixups for "dual instanced" geometry
-          * shaders, since they use attributes that are vec4's.  Since the exec
-          * width is only 4, it's essential that the caller set
-          * force_writemask_all in order to make sure the instruction is executed
-          * regardless of which channels are enabled.
-          */
-         assert(inst->force_writemask_all);
-
-         /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy
-          * the following register region restrictions (from Graphics BSpec:
-          * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions
-          * > Register Region Restrictions)
-          *
-          *     1. ExecSize must be greater than or equal to Width.
-          *
-          *     2. If ExecSize = Width and HorzStride != 0, VertStride must be set
-          *        to Width * HorzStride."
-          */
-         for (int i = 0; i < 3; i++) {
-            if (src[i].file == BRW_GENERAL_REGISTER_FILE)
-               src[i] = stride(src[i], 4, 4, 1);
-         }
-      }
-
       switch (inst->opcode) {
       case VEC4_OPCODE_UNPACK_UNIFORM:
       case BRW_OPCODE_MOV:
@@ -1608,6 +1627,9 @@ generate_code(struct brw_codegen *p,
          /* FBL only supports UD type for dst. */
          brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
          break;
+      case BRW_OPCODE_LZD:
+         brw_LZD(p, dst, src[0]);
+         break;
       case BRW_OPCODE_CBIT:
          assert(devinfo->gen >= 7);
          /* CBIT only supports UD type for dst. */
@@ -1726,10 +1748,12 @@ generate_code(struct brw_codegen *p,
 
       case SHADER_OPCODE_GEN4_SCRATCH_READ:
          generate_scratch_read(p, inst, dst, src[0]);
+         fill_count++;
          break;
 
       case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
          generate_scratch_write(p, inst, dst, src[0], src[1]);
+         spill_count++;
          break;
 
       case VS_OPCODE_PULL_CONSTANT_LOAD:
@@ -1852,11 +1876,17 @@ generate_code(struct brw_codegen *p,
          brw_memory_fence(p, dst);
          break;
 
-      case SHADER_OPCODE_FIND_LIVE_CHANNEL:
-         brw_find_live_channel(p, dst);
+      case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
+         const struct brw_reg mask =
+            brw_stage_has_packed_dispatch(devinfo, nir->stage,
+                                          &prog_data->base) ? brw_imm_ud(~0u) :
+            brw_dmask_reg();
+         brw_find_live_channel(p, dst, mask);
          break;
+      }
 
       case SHADER_OPCODE_BROADCAST:
+         assert(inst->force_writemask_all);
          brw_broadcast(p, dst, src[0], src[1]);
          break;
 
@@ -1880,6 +1910,100 @@ generate_code(struct brw_codegen *p,
          break;
       }
 
+      case VEC4_OPCODE_FROM_DOUBLE: {
+         assert(type_sz(src[0].type) == 8);
+         assert(type_sz(dst.type) == 4);
+
+         brw_set_default_access_mode(p, BRW_ALIGN_1);
+
+         dst.hstride = BRW_HORIZONTAL_STRIDE_2;
+         dst.width = BRW_WIDTH_4;
+         src[0].vstride = BRW_VERTICAL_STRIDE_4;
+         src[0].width = BRW_WIDTH_4;
+         brw_MOV(p, dst, src[0]);
+
+         struct brw_reg dst_as_src = dst;
+         dst.hstride = BRW_HORIZONTAL_STRIDE_1;
+         dst.width = BRW_WIDTH_8;
+         brw_MOV(p, dst, dst_as_src);
+
+         brw_set_default_access_mode(p, BRW_ALIGN_16);
+         break;
+      }
+
+      case VEC4_OPCODE_TO_DOUBLE: {
+         assert(type_sz(src[0].type) == 4);
+         assert(type_sz(dst.type) == 8);
+
+         brw_set_default_access_mode(p, BRW_ALIGN_1);
+
+         struct brw_reg tmp = retype(dst, src[0].type);
+         tmp.hstride = BRW_HORIZONTAL_STRIDE_2;
+         tmp.width = BRW_WIDTH_4;
+         src[0].vstride = BRW_VERTICAL_STRIDE_4;
+         src[0].hstride = BRW_HORIZONTAL_STRIDE_1;
+         src[0].width = BRW_WIDTH_4;
+         brw_MOV(p, tmp, src[0]);
+
+         tmp.vstride = BRW_VERTICAL_STRIDE_8;
+         tmp.hstride = BRW_HORIZONTAL_STRIDE_2;
+         tmp.width = BRW_WIDTH_4;
+         brw_MOV(p, dst, tmp);
+
+         brw_set_default_access_mode(p, BRW_ALIGN_16);
+         break;
+      }
+
+      case VEC4_OPCODE_PICK_LOW_32BIT:
+      case VEC4_OPCODE_PICK_HIGH_32BIT: {
+         /* Stores the low/high 32-bit of each 64-bit element in src[0] into
+          * dst using ALIGN1 mode and a <8,4,2>:UD region on the source.
+          */
+         assert(type_sz(src[0].type) == 8);
+         assert(type_sz(dst.type) == 4);
+
+         brw_set_default_access_mode(p, BRW_ALIGN_1);
+
+         dst = retype(dst, BRW_REGISTER_TYPE_UD);
+         dst.hstride = BRW_HORIZONTAL_STRIDE_1;
+
+         src[0] = retype(src[0], BRW_REGISTER_TYPE_UD);
+         if (inst->opcode == VEC4_OPCODE_PICK_HIGH_32BIT)
+            src[0] = suboffset(src[0], 1);
+         src[0].vstride = BRW_VERTICAL_STRIDE_8;
+         src[0].width = BRW_WIDTH_4;
+         src[0].hstride = BRW_HORIZONTAL_STRIDE_2;
+         brw_MOV(p, dst, src[0]);
+
+         brw_set_default_access_mode(p, BRW_ALIGN_16);
+         break;
+      }
+
+      case VEC4_OPCODE_SET_LOW_32BIT:
+      case VEC4_OPCODE_SET_HIGH_32BIT: {
+         /* Reads consecutive 32-bit elements from src[0] and writes
+          * them to the low/high 32-bit of each 64-bit element in dst.
+          */
+         assert(type_sz(src[0].type) == 4);
+         assert(type_sz(dst.type) == 8);
+
+         brw_set_default_access_mode(p, BRW_ALIGN_1);
+
+         dst = retype(dst, BRW_REGISTER_TYPE_UD);
+         if (inst->opcode == VEC4_OPCODE_SET_HIGH_32BIT)
+            dst = suboffset(dst, 1);
+         dst.hstride = BRW_HORIZONTAL_STRIDE_2;
+
+         src[0] = retype(src[0], BRW_REGISTER_TYPE_UD);
+         src[0].vstride = BRW_VERTICAL_STRIDE_4;
+         src[0].width = BRW_WIDTH_4;
+         src[0].hstride = BRW_HORIZONTAL_STRIDE_1;
+         brw_MOV(p, dst, src[0]);
+
+         brw_set_default_access_mode(p, BRW_ALIGN_16);
+         break;
+      }
+
       case VEC4_OPCODE_PACK_BYTES: {
          /* Is effectively:
           *
@@ -1965,7 +2089,6 @@ generate_code(struct brw_codegen *p,
       case TCS_OPCODE_SRC0_010_IS_ZERO:
          /* If src_reg had stride like fs_reg, we wouldn't need this. */
          brw_MOV(p, brw_null_reg(), stride(src[0], 0, 1, 0));
-         brw_inst_set_cond_modifier(devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
          break;
 
       case TCS_OPCODE_RELEASE_INPUT:
@@ -1983,6 +2106,14 @@ generate_code(struct brw_codegen *p,
 
       case SHADER_OPCODE_MOV_INDIRECT:
          generate_mov_indirect(p, inst, dst, src[0], src[1], src[2]);
+         break;
+
+      case BRW_OPCODE_DIM:
+         assert(devinfo->is_haswell);
+         assert(src[0].type == BRW_REGISTER_TYPE_DF);
+         assert(dst.type == BRW_REGISTER_TYPE_DF);
+         brw_DIM(p, dst, retype(src[0], BRW_REGISTER_TYPE_F));
+         break;
 
       default:
          unreachable("Unsupported opcode");
@@ -2006,7 +2137,7 @@ generate_code(struct brw_codegen *p,
       }
    }
 
-   brw_set_uip_jip(p);
+   brw_set_uip_jip(p, 0);
    annotation_finalize(&annotation, p->next_insn_offset);
 
 #ifndef NDEBUG
@@ -2022,13 +2153,13 @@ generate_code(struct brw_codegen *p,
 
    if (unlikely(debug_flag)) {
       fprintf(stderr, "Native code for %s %s shader %s:\n",
-              nir->info.label ? nir->info.label : "unnamed",
-              _mesa_shader_stage_to_string(nir->stage), nir->info.name);
+              nir->info->label ? nir->info->label : "unnamed",
+              _mesa_shader_stage_to_string(nir->stage), nir->info->name);
 
-      fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. %u cycles."
-                      "Compacted %d to %d bytes (%.0f%%)\n",
-              stage_abbrev,
-              before_size / 16, loop_count, cfg->cycle_count, before_size, after_size,
+      fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. %u cycles. %d:%d "
+                      "spills:fills. Compacted %d to %d bytes (%.0f%%)\n",
+              stage_abbrev, before_size / 16, loop_count, cfg->cycle_count,
+              spill_count, fill_count, before_size, after_size,
               100.0f * (before_size - after_size) / before_size);
 
       dump_assembly(p->store, annotation.ann_count, annotation.ann,
@@ -2039,10 +2170,11 @@ generate_code(struct brw_codegen *p,
 
    compiler->shader_debug_log(log_data,
                               "%s vec4 shader: %d inst, %d loops, %u cycles, "
-                              "compacted %d to %d bytes.",
+                              "%d:%d spills:fills, compacted %d to %d bytes.",
                               stage_abbrev, before_size / 16,
-                              loop_count, cfg->cycle_count,
-                              before_size, after_size);
+                              loop_count, cfg->cycle_count, spill_count,
+                              fill_count, before_size, after_size);
+
 }
 
 extern "C" const unsigned *