Introduce .editorconfig
[mesa.git] / src / gallium / drivers / r600 / r600_asm.c
index cf18f6db907333ed20e9d1f699fd200853ba877a..f85993d45118146be388b217df23dbd43952fe41 100644 (file)
@@ -27,6 +27,7 @@
 #include "r600d.h"
 
 #include <errno.h>
+#include "util/u_bitcast.h"
 #include "util/u_dump.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
@@ -61,6 +62,7 @@ static struct r600_bytecode_cf *r600_bytecode_cf(void)
        LIST_INITHEAD(&cf->alu);
        LIST_INITHEAD(&cf->vtx);
        LIST_INITHEAD(&cf->tex);
+       LIST_INITHEAD(&cf->gds);
        return cf;
 }
 
@@ -94,6 +96,16 @@ static struct r600_bytecode_tex *r600_bytecode_tex(void)
        return tex;
 }
 
+static struct r600_bytecode_gds *r600_bytecode_gds(void)
+{
+       struct r600_bytecode_gds *gds = CALLOC_STRUCT(r600_bytecode_gds);
+
+       if (gds == NULL)
+               return NULL;
+       LIST_INITHEAD(&gds->list);
+       return gds;
+}
+
 static unsigned stack_entry_size(enum radeon_family chip) {
        /* Wavefront size:
         *   64: R600/RV670/RV770/Cypress/R740/Barts/Turks/Caicos/
@@ -226,7 +238,7 @@ int r600_bytecode_add_output(struct r600_bytecode *bc,
 /* alu instructions that can ony exits once per group */
 static int is_alu_once_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
 {
-       return r600_isa_alu(alu->op)->flags & (AF_KILL | AF_PRED);
+       return r600_isa_alu(alu->op)->flags & (AF_KILL | AF_PRED) || alu->is_lds_idx_op || alu->op == ALU_OP0_GROUP_BARRIER;
 }
 
 static int is_alu_reduction_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
@@ -257,6 +269,24 @@ static int alu_uses_rel(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
        return 0;
 }
 
+static int is_lds_read(int sel)
+{
+  return sel == EG_V_SQ_ALU_SRC_LDS_OQ_A_POP || sel == EG_V_SQ_ALU_SRC_LDS_OQ_B_POP;
+}
+
+static int alu_uses_lds(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+{
+       unsigned num_src = r600_bytecode_get_num_operands(bc, alu);
+       unsigned src;
+
+       for (src = 0; src < num_src; ++src) {
+               if (is_lds_read(alu->src[src].sel)) {
+                       return 1;
+               }
+       }
+       return 0;
+}
+
 static int is_alu_64bit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
 {
        const struct alu_op_info *op = r600_isa_alu(alu->op);
@@ -776,6 +806,8 @@ static int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu
                                }
                                have_rel = 1;
                        }
+                       if (alu_uses_lds(bc, prev[i]))
+                               return 0;
 
                        num_once_inst += is_alu_once_inst(bc, prev[i]);
                }
@@ -789,7 +821,7 @@ static int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu
                } else if (prev[i] && slots[i]) {
                        if (max_slots == 5 && result[4] == NULL && prev[4] == NULL && slots[4] == NULL) {
                                /* Trans unit is still free try to use it. */
-                               if (is_alu_any_unit_inst(bc, slots[i])) {
+                               if (is_alu_any_unit_inst(bc, slots[i]) && !alu_uses_lds(bc, slots[i])) {
                                        result[i] = prev[i];
                                        result[4] = slots[i];
                                } else if (is_alu_any_unit_inst(bc, prev[i])) {
@@ -1412,6 +1444,33 @@ int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_t
        return 0;
 }
 
+int r600_bytecode_add_gds(struct r600_bytecode *bc, const struct r600_bytecode_gds *gds)
+{
+       struct r600_bytecode_gds *ngds = r600_bytecode_gds();
+       int r;
+
+       if (ngds == NULL)
+               return -ENOMEM;
+       memcpy(ngds, gds, sizeof(struct r600_bytecode_gds));
+
+       if (bc->cf_last == NULL ||
+           bc->cf_last->op != CF_OP_GDS ||
+           bc->force_add_cf) {
+               r = r600_bytecode_add_cf(bc);
+               if (r) {
+                       free(ngds);
+                       return r;
+               }
+               bc->cf_last->op = CF_OP_GDS;
+       }
+
+       LIST_ADDTAIL(&ngds->list, &bc->cf_last->gds);
+       bc->cf_last->ndw += 4; /* each GDS uses 4 dwords */
+       if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc))
+               bc->force_add_cf = 1;
+       return 0;
+}
+
 int r600_bytecode_add_cfinst(struct r600_bytecode *bc, unsigned op)
 {
        int r;
@@ -1623,6 +1682,7 @@ int r600_bytecode_build(struct r600_bytecode *bc)
        struct r600_bytecode_alu *alu;
        struct r600_bytecode_vtx *vtx;
        struct r600_bytecode_tex *tex;
+       struct r600_bytecode_gds *gds;
        uint32_t literal[4];
        unsigned nliteral;
        unsigned addr;
@@ -1631,7 +1691,7 @@ int r600_bytecode_build(struct r600_bytecode *bc)
        if (!bc->nstack) // If not 0, Stack_size already provided by llvm
                bc->nstack = bc->stack.max_entries;
 
-       if (bc->type == TGSI_PROCESSOR_VERTEX && !bc->nstack) {
+       if ((bc->type == PIPE_SHADER_VERTEX || bc->type == PIPE_SHADER_TESS_EVAL || bc->type == PIPE_SHADER_TESS_CTRL) && !bc->nstack) {
                bc->nstack = 1;
        }
 
@@ -1675,10 +1735,12 @@ int r600_bytecode_build(struct r600_bytecode *bc)
                                        r = r600_bytecode_alu_build(bc, alu, addr);
                                        break;
                                case R700:
-                               case EVERGREEN: /* eg alu is same encoding as r700 */
-                               case CAYMAN:
                                        r = r700_bytecode_alu_build(bc, alu, addr);
                                        break;
+                               case EVERGREEN:
+                               case CAYMAN:
+                                       r = eg_bytecode_alu_build(bc, alu, addr);
+                                       break;
                                default:
                                        R600_ERR("unknown chip class %d.\n", bc->chip_class);
                                        return -EINVAL;
@@ -1701,6 +1763,14 @@ int r600_bytecode_build(struct r600_bytecode *bc)
                                        return r;
                                addr += 4;
                        }
+               } else if (cf->op == CF_OP_GDS) {
+                       assert(bc->chip_class >= EVERGREEN);
+                       LIST_FOR_EACH_ENTRY(gds, &cf->gds, list) {
+                               r = eg_bytecode_gds_build(bc, gds, addr);
+                               if (r)
+                                       return r;
+                               addr += 4;
+                       }
                } else if (cf->op == CF_OP_TEX) {
                        LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
                                assert(bc->chip_class >= EVERGREEN);
@@ -1731,6 +1801,7 @@ void r600_bytecode_clear(struct r600_bytecode *bc)
                struct r600_bytecode_alu *alu = NULL, *next_alu;
                struct r600_bytecode_tex *tex = NULL, *next_tex;
                struct r600_bytecode_tex *vtx = NULL, *next_vtx;
+               struct r600_bytecode_gds *gds = NULL, *next_gds;
 
                LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) {
                        free(alu);
@@ -1750,6 +1821,12 @@ void r600_bytecode_clear(struct r600_bytecode *bc)
 
                LIST_INITHEAD(&cf->vtx);
 
+               LIST_FOR_EACH_ENTRY_SAFE(gds, next_gds, &cf->gds, list) {
+                       free(gds);
+               }
+
+               LIST_INITHEAD(&cf->gds);
+
                free(cf);
        }
 
@@ -1850,6 +1927,28 @@ static int print_src(struct r600_bytecode_alu *alu, unsigned idx)
                need_sel = 0;
                need_chan = 0;
                switch (sel) {
+               case EG_V_SQ_ALU_SRC_LDS_DIRECT_A:
+                       o += fprintf(stderr, "LDS_A[0x%08X]", src->value);
+                       break;
+               case EG_V_SQ_ALU_SRC_LDS_DIRECT_B:
+                       o += fprintf(stderr, "LDS_B[0x%08X]", src->value);
+                       break;
+               case EG_V_SQ_ALU_SRC_LDS_OQ_A:
+                       o += fprintf(stderr, "LDS_OQ_A");
+                       need_chan = 1;
+                       break;
+               case EG_V_SQ_ALU_SRC_LDS_OQ_B:
+                       o += fprintf(stderr, "LDS_OQ_B");
+                       need_chan = 1;
+                       break;
+               case EG_V_SQ_ALU_SRC_LDS_OQ_A_POP:
+                       o += fprintf(stderr, "LDS_OQ_A_POP");
+                       need_chan = 1;
+                       break;
+               case EG_V_SQ_ALU_SRC_LDS_OQ_B_POP:
+                       o += fprintf(stderr, "LDS_OQ_B_POP");
+                       need_chan = 1;
+                       break;
                case V_SQ_ALU_SRC_PS:
                        o += fprintf(stderr, "PS");
                        break;
@@ -1858,7 +1957,7 @@ static int print_src(struct r600_bytecode_alu *alu, unsigned idx)
                        need_chan = 1;
                        break;
                case V_SQ_ALU_SRC_LITERAL:
-                       o += fprintf(stderr, "[0x%08X %f]", src->value, *(float*)&src->value);
+                       o += fprintf(stderr, "[0x%08X %f]", src->value, u_bitcast_u2f(src->value));
                        break;
                case V_SQ_ALU_SRC_0_5:
                        o += fprintf(stderr, "0.5");
@@ -1911,6 +2010,7 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
        struct r600_bytecode_alu *alu = NULL;
        struct r600_bytecode_vtx *vtx = NULL;
        struct r600_bytecode_tex *tex = NULL;
+       struct r600_bytecode_gds *gds = NULL;
 
        unsigned i, id, ngr = 0, last;
        uint32_t literal[4];
@@ -2194,6 +2294,33 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
 
                        id += 4;
                }
+
+               LIST_FOR_EACH_ENTRY(gds, &cf->gds, list) {
+                       int o = 0;
+                       o += fprintf(stderr, " %04d %08X %08X %08X   ", id, bc->bytecode[id],
+                                       bc->bytecode[id + 1], bc->bytecode[id + 2]);
+
+                       o += fprintf(stderr, "%s ", r600_isa_fetch(gds->op)->name);
+
+                       if (gds->op != FETCH_OP_TF_WRITE) {
+                               o += fprintf(stderr, "R%d.", gds->dst_gpr);
+                               o += print_swizzle(gds->dst_sel_x);
+                               o += print_swizzle(gds->dst_sel_y);
+                               o += print_swizzle(gds->dst_sel_z);
+                               o += print_swizzle(gds->dst_sel_w);
+                       }
+
+                       o += fprintf(stderr, ", R%d.", gds->src_gpr);
+                       o += print_swizzle(gds->src_sel_x);
+                       o += print_swizzle(gds->src_sel_y);
+                       o += print_swizzle(gds->src_sel_z);
+
+                       if (gds->op != FETCH_OP_TF_WRITE) {
+                               o += fprintf(stderr, ", R%d.", gds->src_gpr2);
+                       }
+                       fprintf(stderr, "\n");
+                       id += 4;
+               }
        }
 
        fprintf(stderr, "--------------------------------------\n");
@@ -2217,6 +2344,12 @@ void r600_vertex_data_type(enum pipe_format pformat,
                return;
        }
 
+       if (pformat == PIPE_FORMAT_B5G6R5_UNORM) {
+               *format = FMT_5_6_5;
+               *endian = r600_endian_swap(16);
+               return;
+       }
+
        desc = util_format_description(pformat);
        if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
                goto out_unknown;
@@ -2489,7 +2622,8 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
                return NULL;
        }
 
-       u_suballocator_alloc(rctx->allocator_fetch_shader, fs_size, &shader->offset,
+       u_suballocator_alloc(rctx->allocator_fetch_shader, fs_size, 256,
+                            &shader->offset,
                             (struct pipe_resource**)&shader->buffer);
        if (!shader->buffer) {
                r600_bytecode_clear(&bc);
@@ -2507,7 +2641,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
        } else {
                memcpy(bytecode, bc.bytecode, fs_size);
        }
-       rctx->b.ws->buffer_unmap(shader->buffer->cs_buf);
+       rctx->b.ws->buffer_unmap(shader->buffer->buf);
 
        r600_bytecode_clear(&bc);
        return shader;