glsl/lower_ubo_reference: split struct copies into element copies
authorIago Toral Quiroga <itoral@igalia.com>
Mon, 23 Nov 2015 12:53:09 +0000 (13:53 +0100)
committerIago Toral Quiroga <itoral@igalia.com>
Tue, 1 Dec 2015 12:30:42 +0000 (13:30 +0100)
Improves register pressure, since otherwise we end up emitting
loads for all the elements in the RHS and them emitting
stores for all elements in the LHS.

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
src/glsl/lower_ubo_reference.cpp

index b82d800eefb5f18d1f64e3b62851de50e12e5153..8ec83465b69068372f4209022d8902fcd0d0db49 100644 (file)
@@ -155,6 +155,7 @@ public:
                       ir_rvalue *offset);
 
    bool check_for_buffer_array_copy(ir_assignment *ir);
+   bool check_for_buffer_struct_copy(ir_assignment *ir);
    void check_for_ssbo_store(ir_assignment *ir);
    void write_to_memory(ir_dereference *deref,
                         ir_variable *var,
@@ -1190,12 +1191,60 @@ lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir)
    return true;
 }
 
+bool
+lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir)
+{
+   if (!ir || !ir->lhs || !ir->rhs)
+      return false;
+
+   /* LHS and RHS must be records */
+   if (!ir->lhs->type->is_record() || !ir->rhs->type->is_record())
+      return false;
+
+   /* RHS must be a buffer-backed variable. This is what can cause the problem
+    * since it would lead to a series of loads that need to live until we
+    * see the writes to the LHS.
+    */
+   ir_variable *rhs_var = ir->rhs->variable_referenced();
+   if (!rhs_var || !is_buffer_backed_variable(rhs_var))
+      return false;
+
+   /* Split the struct copy into individual element copies to reduce
+    * register pressure
+    */
+   ir_dereference *rhs_deref = ir->rhs->as_dereference();
+   if (!rhs_deref)
+      return false;
+
+   ir_dereference *lhs_deref = ir->lhs->as_dereference();
+   if (!lhs_deref)
+      return false;
+
+   assert(lhs_deref->type->record_compare(rhs_deref->type));
+   mem_ctx = ralloc_parent(shader->ir);
+
+   for (unsigned i = 0; i < lhs_deref->type->length; i++) {
+      const char *field_name = lhs_deref->type->fields.structure[i].name;
+      ir_dereference *lhs_field =
+         new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL),
+                                            field_name);
+      ir_dereference *rhs_field =
+         new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL),
+                                            field_name);
+      ir->insert_after(assign(lhs_field, rhs_field));
+   }
+
+   ir->remove();
+   progress = true;
+   return true;
+}
+
 ir_visitor_status
 lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
 {
-   /* Array copies could involve large amounts of load/store
+   /* Array and struct copies could involve large amounts of load/store
     * operations. To improve register pressure we want to special-case
-    * these and split array copies into individual element copies.
+    * these and split them into individual element copies.
     * This way we avoid emitting all the loads for the RHS first and
     * all the writes for the LHS second and register usage is more
     * efficient.
@@ -1203,6 +1252,9 @@ lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
    if (check_for_buffer_array_copy(ir))
       return visit_continue_with_parent;
 
+   if (check_for_buffer_struct_copy(ir))
+      return visit_continue_with_parent;
+
    check_ssbo_unsized_array_length_assignment(ir);
    check_for_ssbo_store(ir);
    return rvalue_visit(ir);