From: Paul Berry Date: Tue, 3 Sep 2013 19:30:06 +0000 (-0700) Subject: i965/vec4: Generate URB writes using a loop. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=784044c206efd774ce1f7a481311480f85446887;p=mesa.git i965/vec4: Generate URB writes using a loop. Previously we only ever did 1 or 2 URB writes, since the maximum number of varyings we support is small enough to fit in 2 URB writes. But GL 3.2 requires the geometry shader to support 128 output varying components, and this could require up to 3 URB writes. Reviewed-by: Kenneth Graunke --- diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 304636a8cf9..874e6e3f385 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -2694,47 +2694,37 @@ vec4_visitor::emit_vertex() emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1], 4); } - /* Set up the VUE data for the first URB write */ - int slot; - for (slot = 0; slot < prog_data->vue_map.num_slots; ++slot) { - emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]); - - /* If this was max_usable_mrf, we can't fit anything more into this URB - * WRITE. + /* We may need to split this up into several URB writes, so do them in a + * loop. + */ + int slot = 0; + bool complete = false; + do { + /* URB offset is in URB row increments, and each of our MRFs is half of + * one of those, since we're doing interleaved writes. */ - if (mrf > max_usable_mrf) { - slot++; - break; - } - } - - bool complete = slot >= prog_data->vue_map.num_slots; - current_annotation = "URB write"; - vec4_instruction *inst = emit_urb_write_opcode(complete); - inst->base_mrf = base_mrf; - inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); + int offset = slot / 2; - /* Optional second URB write */ - if (!complete) { mrf = base_mrf + 1; - for (; slot < prog_data->vue_map.num_slots; ++slot) { - assert(mrf < max_usable_mrf); - emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]); + + /* If this was max_usable_mrf, we can't fit anything more into this + * URB WRITE. + */ + if (mrf > max_usable_mrf) { + slot++; + break; + } } + complete = slot >= prog_data->vue_map.num_slots; current_annotation = "URB write"; - inst = emit_urb_write_opcode(true /* complete */); + vec4_instruction *inst = emit_urb_write_opcode(complete); inst->base_mrf = base_mrf; inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); - /* URB destination offset. In the previous write, we got MRFs - * 2-13 minus the one header MRF, so 12 regs. URB offset is in - * URB row increments, and each of our MRFs is half of one of - * those, since we're doing interleaved writes. - */ - inst->offset += (max_usable_mrf - base_mrf) / 2; - } + inst->offset += offset; + } while(!complete); }