default_state.exec_size = save_exec_size;
}
+/**
+ * Change the register's data type from UD to HF, doubling the strides in order
+ * to compensate for halving the data type width.
+ */
+static struct brw_reg
+ud_reg_to_hf(struct brw_reg r)
+{
+ assert(r.type == BRW_REGISTER_TYPE_UD);
+ r.type = BRW_REGISTER_TYPE_HF;
+
+ /* The BRW_*_STRIDE enums are defined so that incrementing the field
+ * doubles the real stride.
+ */
+ if (r.hstride != 0)
+ ++r.hstride;
+ if (r.vstride != 0)
+ ++r.vstride;
+
+ return r;
+}
+
+void
+gen8_fs_generator::generate_pack_half_2x16_split(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg x,
+ struct brw_reg y)
+{
+ assert(dst.type == BRW_REGISTER_TYPE_UD);
+ assert(x.type == BRW_REGISTER_TYPE_F);
+ assert(y.type == BRW_REGISTER_TYPE_F);
+
+ struct brw_reg dst_hf = ud_reg_to_hf(dst);
+
+ /* Give each 32-bit channel of dst the form below , where "." means
+ * unchanged.
+ * 0x....hhhh
+ */
+ MOV(dst_hf, y);
+
+ /* Now the form:
+ * 0xhhhh0000
+ */
+ SHL(dst, dst, brw_imm_ud(16u));
+
+ /* And, finally the form of packHalf2x16's output:
+ * 0xhhhhllll
+ */
+ MOV(dst_hf, x);
+}
+
+void
+gen8_fs_generator::generate_unpack_half_2x16_split(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ assert(dst.type == BRW_REGISTER_TYPE_F);
+ assert(src.type == BRW_REGISTER_TYPE_UD);
+
+ struct brw_reg src_hf = ud_reg_to_hf(src);
+
+ /* Each channel of src has the form of unpackHalf2x16's input: 0xhhhhllll.
+ * For the Y case, we wish to access only the upper word; therefore
+ * a 16-bit subregister offset is needed.
+ */
+ assert(inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X ||
+ inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y);
+ if (inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y)
+ src_hf.subnr += 2;
+
+ MOV(dst, src_hf);
+}
+
void
gen8_fs_generator::generate_code(exec_list *instructions)
{
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
if (prog) {
- printf("Native code for fragment shader %d (SIMD%d dispatch):\n",
+ fprintf(stderr,
+ "Native code for %s fragment shader %d (SIMD%d dispatch):\n",
+ shader_prog->Label ? shader_prog->Label : "unnamed",
shader_prog->Name, dispatch_width);
} else if (fp) {
- printf("Native code for fragment program %d (SIMD%d dispatch):\n",
- prog->Id, dispatch_width);
+ fprintf(stderr,
+ "Native code for fragment program %d (SIMD%d dispatch):\n",
+ prog->Id, dispatch_width);
} else {
- printf("Native code for blorp program (SIMD%d dispatch):\n",
- dispatch_width);
+ fprintf(stderr, "Native code for blorp program (SIMD%d dispatch):\n",
+ dispatch_width);
}
}
bblock_t *block = link->block;
if (block->start == ir) {
- printf(" START B%d", block->block_num);
+ fprintf(stderr, " START B%d", block->block_num);
foreach_list(predecessor_node, &block->parents) {
bblock_link *predecessor_link =
(bblock_link *)predecessor_node;
bblock_t *predecessor_block = predecessor_link->block;
- printf(" <-B%d", predecessor_block->block_num);
+ fprintf(stderr, " <-B%d", predecessor_block->block_num);
}
- printf("\n");
+ fprintf(stderr, "\n");
}
}
if (last_annotation_ir != ir->ir) {
last_annotation_ir = ir->ir;
if (last_annotation_ir) {
- printf(" ");
+ fprintf(stderr, " ");
if (prog) {
- ((ir_instruction *) ir->ir)->print();
+ ((ir_instruction *) ir->ir)->fprint(stderr);
} else if (prog) {
const prog_instruction *fpi;
fpi = (const prog_instruction *) ir->ir;
- printf("%d: ", (int)(fpi - prog->Instructions));
- _mesa_fprint_instruction_opt(stdout,
+ fprintf(stderr, "%d: ", (int)(fpi - prog->Instructions));
+ _mesa_fprint_instruction_opt(stderr,
fpi,
0, PROG_PRINT_DEBUG, NULL);
}
- printf("\n");
+ fprintf(stderr, "\n");
}
}
if (last_annotation_string != ir->annotation) {
last_annotation_string = ir->annotation;
if (last_annotation_string)
- printf(" %s\n", last_annotation_string);
+ fprintf(stderr, " %s\n", last_annotation_string);
}
}
break;
case FS_OPCODE_PACK_HALF_2x16_SPLIT:
- assert(!"XXX: Missing Gen8 scalar support for PACK_HALF_2x16_SPLIT");
+ generate_pack_half_2x16_split(ir, dst, src[0], src[1]);
break;
case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
- assert(!"XXX: Missing Gen8 scalar support for UNPACK_HALF_2x16_SPLIT");
+ generate_unpack_half_2x16_split(ir, dst, src[0]);
break;
case FS_OPCODE_PLACEHOLDER_HALT:
}
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- disassemble(stdout, last_native_inst_offset, next_inst_offset);
+ disassemble(stderr, last_native_inst_offset, next_inst_offset);
foreach_list(node, &cfg->block_list) {
bblock_link *link = (bblock_link *)node;
bblock_t *block = link->block;
if (block->end == ir) {
- printf(" END B%d", block->block_num);
+ fprintf(stderr, " END B%d", block->block_num);
foreach_list(successor_node, &block->children) {
bblock_link *successor_link =
(bblock_link *)successor_node;
bblock_t *successor_block = successor_link->block;
- printf(" ->B%d", successor_block->block_num);
+ fprintf(stderr, " ->B%d", successor_block->block_num);
}
- printf("\n");
+ fprintf(stderr, "\n");
}
}
}
}
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- printf("\n");
+ fprintf(stderr, "\n");
}
patch_jump_targets();
+
+ /* OK, while the INTEL_DEBUG=fs above is very nice for debugging FS
+ * emit issues, it doesn't get the jump distances into the output,
+ * which is often something we want to debug. So this is here in
+ * case you're doing that.
+ */
+ if (0 && unlikely(INTEL_DEBUG & DEBUG_WM)) {
+ disassemble(stderr, 0, next_inst_offset);
+ }
}
const unsigned *