X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=inline;f=src%2Fgallium%2Fdrivers%2Fr300%2Fcompiler%2Fradeon_pair_schedule.c;h=df54b084de18e2038ba21a7455ebdf047d8faefd;hb=60a27ad122128145d28be37e9c0b0bc86a8e5181;hp=25cd52c9cd41292c14f8abdbcc04527de4a995c7;hpb=f8e6d19f3f40931be741b44d3edf210c38e13f0f;p=mesa.git diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c index 25cd52c9cd4..df54b084de1 100644 --- a/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c +++ b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c @@ -32,7 +32,10 @@ #include "radeon_compiler.h" #include "radeon_compiler_util.h" #include "radeon_dataflow.h" +#include "radeon_list.h" +#include "radeon_variable.h" +#include "util/u_debug.h" #define VERBOSE 0 @@ -60,6 +63,22 @@ struct schedule_instruction { * "all readers"), even those outside the basic block this instruction * lives in. */ struct rc_reader_data GlobalReaders; + + /** If the scheduler has paired an RGB and an Alpha instruction together, + * PairedInst references the alpha insturction's dependency information. + */ + struct schedule_instruction * PairedInst; + + /** This scheduler uses the value of Score to determine which + * instruction to schedule. Instructions with a higher value of Score + * will be scheduled first. */ + int Score; + + /** The number of components that read from a TEX instruction. */ + unsigned TexReadCount; + + /** For TEX instructions a list of readers */ + struct rc_list * TexReaders; }; @@ -113,6 +132,9 @@ struct remap_reg { struct schedule_state { struct radeon_compiler * C; struct schedule_instruction * Current; + /** Array of the previous writers of Current's destination register + * indexed by channel. */ + struct schedule_instruction * PrevWriter[4]; struct register_state Temporary[RC_REGISTER_MAX_INDEX]; @@ -126,6 +148,13 @@ struct schedule_state { struct schedule_instruction *ReadyAlpha; struct schedule_instruction *ReadyTEX; /*@}*/ + struct rc_list *PendingTEX; + + void (*CalcScore)(struct schedule_instruction *); + long max_tex_group; + unsigned PrevBlockHasTex:1; + unsigned TEXCount; + unsigned Opt:1; }; static struct reg_value ** get_reg_valuep(struct schedule_state * s, @@ -142,23 +171,76 @@ static struct reg_value ** get_reg_valuep(struct schedule_state * s, return &s->Temporary[index].Values[chan]; } +static unsigned get_tex_read_count(struct schedule_instruction * sinst) +{ + unsigned tex_read_count = sinst->TexReadCount; + if (sinst->PairedInst) { + tex_read_count += sinst->PairedInst->TexReadCount; + } + return tex_read_count; +} + +#if VERBOSE +static void print_list(struct schedule_instruction * sinst) +{ + struct schedule_instruction * ptr; + for (ptr = sinst; ptr; ptr=ptr->NextReady) { + unsigned tex_read_count = get_tex_read_count(ptr); + unsigned score = sinst->Score; + fprintf(stderr,"%u (%d) [%u],", ptr->Instruction->IP, score, + tex_read_count); + } + fprintf(stderr, "\n"); +} +#endif + +static void remove_inst_from_list(struct schedule_instruction ** list, + struct schedule_instruction * inst) +{ + struct schedule_instruction * prev = NULL; + struct schedule_instruction * list_ptr; + for (list_ptr = *list; list_ptr; prev = list_ptr, + list_ptr = list_ptr->NextReady) { + if (list_ptr == inst) { + if (prev) { + prev->NextReady = inst->NextReady; + } else { + *list = inst->NextReady; + } + inst->NextReady = NULL; + break; + } + } +} + static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst) { inst->NextReady = *list; *list = inst; } -static void add_inst_to_list_end(struct schedule_instruction ** list, +static void add_inst_to_list_score(struct schedule_instruction ** list, struct schedule_instruction * inst) { - if(!*list){ + struct schedule_instruction * temp; + struct schedule_instruction * prev; + if (!*list) { *list = inst; - }else{ - struct schedule_instruction * temp = *list; - while(temp->NextReady){ - temp = temp->NextReady; - } - temp->NextReady = inst; + return; + } + temp = *list; + prev = NULL; + while(temp && inst->Score <= temp->Score) { + prev = temp; + temp = temp->NextReady; + } + + if (!prev) { + inst->NextReady = temp; + *list = inst; + } else { + prev->NextReady = inst; + inst->NextReady = temp; } } @@ -169,13 +251,13 @@ static void instruction_ready(struct schedule_state * s, struct schedule_instruc /* Adding Ready TEX instructions to the end of the "Ready List" helps * us emit TEX instructions in blocks without losing our place. */ if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) - add_inst_to_list_end(&s->ReadyTEX, sinst); + add_inst_to_list_score(&s->ReadyTEX, sinst); else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP) - add_inst_to_list(&s->ReadyRGB, sinst); + add_inst_to_list_score(&s->ReadyRGB, sinst); else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP) - add_inst_to_list(&s->ReadyAlpha, sinst); + add_inst_to_list_score(&s->ReadyAlpha, sinst); else - add_inst_to_list(&s->ReadyFullALU, sinst); + add_inst_to_list_score(&s->ReadyFullALU, sinst); } static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst) @@ -186,6 +268,99 @@ static void decrease_dependencies(struct schedule_state * s, struct schedule_ins instruction_ready(s, sinst); } +/* These functions provide different heuristics for scheduling instructions. + * The default is calc_score_readers. */ + +#if 0 + +static void calc_score_zero(struct schedule_instruction * sinst) +{ + sinst->Score = 0; +} + +static void calc_score_deps(struct schedule_instruction * sinst) +{ + int i; + sinst->Score = 0; + for (i = 0; i < sinst->NumWriteValues; i++) { + struct reg_value * v = sinst->WriteValues[i]; + if (v->NumReaders) { + struct reg_value_reader * r; + for (r = v->Readers; r; r = r->Next) { + if (r->Reader->NumDependencies == 1) { + sinst->Score += 100; + } + sinst->Score += r->Reader->NumDependencies; + } + } + } +} + +#endif + +#define NO_OUTPUT_SCORE (1 << 24) + +static void score_no_output(struct schedule_instruction * sinst) +{ + assert(sinst->Instruction->Type != RC_INSTRUCTION_NORMAL); + if (!sinst->Instruction->U.P.RGB.OutputWriteMask && + !sinst->Instruction->U.P.Alpha.OutputWriteMask) { + if (sinst->PairedInst) { + if (!sinst->PairedInst->Instruction->U.P. + RGB.OutputWriteMask + && !sinst->PairedInst->Instruction->U.P. + Alpha.OutputWriteMask) { + sinst->Score |= NO_OUTPUT_SCORE; + } + + } else { + sinst->Score |= NO_OUTPUT_SCORE; + } + } +} + +#define PAIRED_SCORE (1 << 16) + +static void calc_score_r300(struct schedule_instruction * sinst) +{ + unsigned src_idx; + + if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) { + sinst->Score = 0; + return; + } + + score_no_output(sinst); + + if (sinst->PairedInst) { + sinst->Score |= PAIRED_SCORE; + return; + } + + for (src_idx = 0; src_idx < 4; src_idx++) { + sinst->Score += sinst->Instruction->U.P.RGB.Src[src_idx].Used + + sinst->Instruction->U.P.Alpha.Src[src_idx].Used; + } +} + +#define NO_READ_TEX_SCORE (1 << 16) + +static void calc_score_readers(struct schedule_instruction * sinst) +{ + if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) { + sinst->Score = 0; + } else { + sinst->Score = sinst->NumReadValues; + if (sinst->PairedInst) { + sinst->Score += sinst->PairedInst->NumReadValues; + } + if (get_tex_read_count(sinst) == 0) { + sinst->Score |= NO_READ_TEX_SCORE; + } + score_no_output(sinst); + } +} + /** * This function decreases the dependencies of the next instruction that * wants to write to each of sinst's read values. @@ -198,10 +373,14 @@ static void commit_update_reads(struct schedule_state * s, assert(v->NumReaders > 0); v->NumReaders--; if (!v->NumReaders) { - if (v->Next) + if (v->Next) { decrease_dependencies(s, v->Next->Writer); + } } } + if (sinst->PairedInst) { + commit_update_reads(s, sinst->PairedInst); + } } static void commit_update_writes(struct schedule_state * s, @@ -224,15 +403,38 @@ static void commit_update_writes(struct schedule_state * s, decrease_dependencies(s, v->Next->Writer); } } + if (sinst->PairedInst) { + commit_update_writes(s, sinst->PairedInst); + } +} + +static void notify_sem_wait(struct schedule_state *s) +{ + struct rc_list * pend_ptr; + for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) { + struct rc_list * read_ptr; + struct schedule_instruction * pending = pend_ptr->Item; + for (read_ptr = pending->TexReaders; read_ptr; + read_ptr = read_ptr->Next) { + struct schedule_instruction * reader = read_ptr->Item; + reader->TexReadCount--; + } + } + s->PendingTEX = NULL; } static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst) { - DBG("%i: commit\n", sinst->Instruction->IP); + DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score); commit_update_reads(s, sinst); commit_update_writes(s, sinst); + + if (get_tex_read_count(sinst) > 0) { + sinst->Instruction->U.P.SemWait = 1; + notify_sem_wait(s); + } } /** @@ -247,6 +449,7 @@ static void emit_all_tex(struct schedule_state * s, struct rc_instruction * befo struct rc_instruction * inst_begin; assert(s->ReadyTEX); + notify_sem_wait(s); /* Node marker for R300 */ inst_begin = rc_insert_new_instruction(s->C, before->Prev); @@ -278,6 +481,12 @@ static void emit_all_tex(struct schedule_state * s, struct rc_instruction * befo while(readytex){ DBG("%i: commit TEX writes\n", readytex->Instruction->IP); commit_update_writes(s, readytex); + /* Set semaphore bits for last TEX instruction in the block */ + if (!readytex->NextReady) { + readytex->Instruction->U.I.TexSemAcquire = 1; + readytex->Instruction->U.I.TexSemWait = 1; + } + rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex)); readytex = readytex->NextReady; } } @@ -450,6 +659,7 @@ static int destructive_merge_instructions( rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask; rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask; rgb->Alpha.Saturate = alpha->Alpha.Saturate; + rgb->Alpha.Omod = alpha->Alpha.Omod; /* Merge ALU result writing */ if (alpha->WriteALUResult) { @@ -460,6 +670,9 @@ static int destructive_merge_instructions( rgb->ALUResultCompare = alpha->ALUResultCompare; } + /* Copy SemWait */ + rgb->SemWait |= alpha->SemWait; + return 1; } @@ -479,6 +692,14 @@ static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_i || (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) { return 0; } + + /* Writing output registers in the middle of shaders is slow, so + * we don't want to pair output writes with temp writes. */ + if ((rgb->RGB.OutputWriteMask && !alpha->Alpha.OutputWriteMask) + || (!rgb->RGB.OutputWriteMask && alpha->Alpha.OutputWriteMask)) { + return 0; + } + memcpy(&backup, rgb, sizeof(struct rc_pair_instruction)); if (destructive_merge_instructions(rgb, alpha)) @@ -594,7 +815,7 @@ static void is_rgb_to_alpha_possible( struct rc_pair_instruction_arg * arg, struct rc_pair_instruction_source * src) { - unsigned int chan_count = 0; + unsigned int read_chan = RC_SWIZZLE_UNUSED; unsigned int alpha_sources = 0; unsigned int i; struct rc_reader_data * reader_data = userdata; @@ -616,8 +837,9 @@ static void is_rgb_to_alpha_possible( return; } - /* Make sure the source only reads from one component. - * XXX We should allow the source to read from the same component twice. + /* Make sure the source only reads the register component that we + * are going to be convering from. It is OK if the instruction uses + * this component more than once. * XXX If the index we will be converting to is the same as the * current index, then it is OK to read from more than one component. */ @@ -628,16 +850,17 @@ static void is_rgb_to_alpha_possible( case RC_SWIZZLE_Y: case RC_SWIZZLE_Z: case RC_SWIZZLE_W: - chan_count++; + if (read_chan == RC_SWIZZLE_UNUSED) { + read_chan = swz; + } else if (read_chan != swz) { + reader_data->Abort = 1; + return; + } break; default: break; } } - if (chan_count > 1) { - reader_data->Abort = 1; - return; - } /* Make sure there are enough alpha sources. * XXX If we know what register all the readers are going @@ -707,15 +930,23 @@ static int convert_rgb_to_alpha( return 0; } - pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode; + /* If we are converting a full instruction with RC_OPCODE_REPL_ALPHA + * as the RGB opcode, then the Alpha instruction will already contain + * the correct opcode and instruction args, so we do not want to + * overwrite them. + */ + if (pair_inst->RGB.Opcode != RC_OPCODE_REPL_ALPHA) { + pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode; + memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg, + sizeof(pair_inst->Alpha.Arg)); + } pair_inst->Alpha.DestIndex = new_index; pair_inst->Alpha.WriteMask = RC_MASK_W; pair_inst->Alpha.Target = pair_inst->RGB.Target; pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask; pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask; pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate; - memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg, - sizeof(pair_inst->Alpha.Arg)); + pair_inst->Alpha.Omod = pair_inst->RGB.Omod; /* Move the swizzles into the first chan */ for (i = 0; i < info->NumSrcRegs; i++) { unsigned int j; @@ -745,91 +976,170 @@ static int convert_rgb_to_alpha( return 1; } -/** - * Find a good ALU instruction or pair of ALU instruction and emit it. - * - * Prefer emitting full ALU instructions, so that when we reach a point - * where no full ALU instruction can be emitted, we have more candidates - * for RGB/Alpha pairing. - */ -static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before) +static void try_convert_and_pair( + struct schedule_state *s, + struct schedule_instruction ** inst_list) { - struct schedule_instruction * sinst; + struct schedule_instruction * list_ptr = *inst_list; + while (list_ptr && *inst_list && (*inst_list)->NextReady) { + int paired = 0; + if (list_ptr->Instruction->U.P.Alpha.Opcode != RC_OPCODE_NOP + && list_ptr->Instruction->U.P.RGB.Opcode + != RC_OPCODE_REPL_ALPHA) { + goto next; + } + if (list_ptr->NumWriteValues == 1 + && convert_rgb_to_alpha(s, list_ptr)) { + + struct schedule_instruction * pair_ptr; + remove_inst_from_list(inst_list, list_ptr); + add_inst_to_list_score(&s->ReadyAlpha, list_ptr); + + for (pair_ptr = s->ReadyRGB; pair_ptr; + pair_ptr = pair_ptr->NextReady) { + if (merge_instructions(&pair_ptr->Instruction->U.P, + &list_ptr->Instruction->U.P)) { + remove_inst_from_list(&s->ReadyAlpha, list_ptr); + remove_inst_from_list(&s->ReadyRGB, pair_ptr); + pair_ptr->PairedInst = list_ptr; + + add_inst_to_list(&s->ReadyFullALU, pair_ptr); + list_ptr = *inst_list; + paired = 1; + break; + } - if (s->ReadyFullALU) { - sinst = s->ReadyFullALU; - s->ReadyFullALU = s->ReadyFullALU->NextReady; - rc_insert_instruction(before->Prev, sinst->Instruction); - commit_alu_instruction(s, sinst); - } else { - struct schedule_instruction **prgb; - struct schedule_instruction **palpha; - struct schedule_instruction *prev; -pair: - /* Some pairings might fail because they require too - * many source slots; try all possible pairings if necessary */ - for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { - for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { - struct schedule_instruction * psirgb = *prgb; - struct schedule_instruction * psialpha = *palpha; - - if (!merge_instructions(&psirgb->Instruction->U.P, &psialpha->Instruction->U.P)) - continue; - - *prgb = (*prgb)->NextReady; - *palpha = (*palpha)->NextReady; - rc_insert_instruction(before->Prev, psirgb->Instruction); - commit_alu_instruction(s, psirgb); - commit_alu_instruction(s, psialpha); - goto success; } } - prev = NULL; - /* No success in pairing, now try to convert one of the RGB - * instructions to an Alpha so we can pair it with another RGB. - */ - if (s->ReadyRGB && s->ReadyRGB->NextReady) { - for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { - if ((*prgb)->NumWriteValues == 1) { - struct schedule_instruction * prgb_next; - if (!convert_rgb_to_alpha(s, *prgb)) - goto cont_loop; - prgb_next = (*prgb)->NextReady; - /* Add instruction to the Alpha ready list. */ - (*prgb)->NextReady = s->ReadyAlpha; - s->ReadyAlpha = *prgb; - /* Remove instruction from the RGB ready list.*/ - if (prev) - prev->NextReady = prgb_next; - else - s->ReadyRGB = prgb_next; - goto pair; + if (!paired) { +next: + list_ptr = list_ptr->NextReady; + } + } +} + +/** + * This function attempts to merge RGB and Alpha instructions together. + */ +static void pair_instructions(struct schedule_state * s) +{ + struct schedule_instruction *rgb_ptr; + struct schedule_instruction *alpha_ptr; + + /* Some pairings might fail because they require too + * many source slots; try all possible pairings if necessary */ + rgb_ptr = s->ReadyRGB; + while(rgb_ptr) { + struct schedule_instruction * rgb_next = rgb_ptr->NextReady; + alpha_ptr = s->ReadyAlpha; + while(alpha_ptr) { + struct schedule_instruction * alpha_next = alpha_ptr->NextReady; + if (merge_instructions(&rgb_ptr->Instruction->U.P, &alpha_ptr->Instruction->U.P)) { + /* Remove RGB and Alpha from their ready lists. + */ + remove_inst_from_list(&s->ReadyRGB, rgb_ptr); + remove_inst_from_list(&s->ReadyAlpha, alpha_ptr); + rgb_ptr->PairedInst = alpha_ptr; + add_inst_to_list(&s->ReadyFullALU, rgb_ptr); + break; } -cont_loop: - prev = *prgb; - } - } - /* Still no success in pairing, just take the first RGB - * or alpha instruction. */ - if (s->ReadyRGB) { - sinst = s->ReadyRGB; - s->ReadyRGB = s->ReadyRGB->NextReady; - } else if (s->ReadyAlpha) { - sinst = s->ReadyAlpha; - s->ReadyAlpha = s->ReadyAlpha->NextReady; - } else { - /*XXX Something real bad has happened. */ - assert(0); + alpha_ptr = alpha_next; + } + rgb_ptr = rgb_next; + } + + if (!s->Opt) { + return; + } + + /* Full instructions that have RC_OPCODE_REPL_ALPHA in the RGB + * slot can be converted into Alpha instructions. */ + try_convert_and_pair(s, &s->ReadyFullALU); + + /* Try to convert some of the RGB instructions to Alpha and + * try to pair it with another RGB. */ + try_convert_and_pair(s, &s->ReadyRGB); +} + +static void update_max_score( + struct schedule_state * s, + struct schedule_instruction ** list, + int * max_score, + struct schedule_instruction ** max_inst_out, + struct schedule_instruction *** list_out) +{ + struct schedule_instruction * list_ptr; + for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) { + int score; + s->CalcScore(list_ptr); + score = list_ptr->Score; + if (!*max_inst_out || score > *max_score) { + *max_score = score; + *max_inst_out = list_ptr; + *list_out = list; + } + } +} + +static void emit_instruction( + struct schedule_state * s, + struct rc_instruction * before) +{ + int max_score = -1; + struct schedule_instruction * max_inst = NULL; + struct schedule_instruction ** max_list = NULL; + unsigned tex_count = 0; + struct schedule_instruction * tex_ptr; + + pair_instructions(s); +#if VERBOSE + fprintf(stderr, "Full:\n"); + print_list(s->ReadyFullALU); + fprintf(stderr, "RGB:\n"); + print_list(s->ReadyRGB); + fprintf(stderr, "Alpha:\n"); + print_list(s->ReadyAlpha); + fprintf(stderr, "TEX:\n"); + print_list(s->ReadyTEX); +#endif + + for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) { + if (tex_ptr->Instruction->U.I.Opcode == RC_OPCODE_KIL) { + emit_all_tex(s, before); + return; } + tex_count++; + } + update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list); + update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list); + update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list); + + if (tex_count >= s->max_tex_group || max_score == -1 + || (s->TEXCount > 0 && tex_count == s->TEXCount) + || (!s->C->is_r500 && tex_count > 0 && max_score == -1)) { + emit_all_tex(s, before); + } else { + + + remove_inst_from_list(max_list, max_inst); + rc_insert_instruction(before->Prev, max_inst->Instruction); + commit_alu_instruction(s, max_inst); - rc_insert_instruction(before->Prev, sinst->Instruction); - commit_alu_instruction(s, sinst); - success: ; + presub_nop(before->Prev); + } +} + +static void add_tex_reader( + struct schedule_state * s, + struct schedule_instruction * writer, + struct schedule_instruction * reader) +{ + if (!writer || writer->Instruction->Type != RC_INSTRUCTION_NORMAL) { + /*Not a TEX instructions */ + return; } - /* If the instruction we just emitted uses a presubtract value, and - * the presubtract sources were written by the previous intstruction, - * the previous instruction needs a nop. */ - presub_nop(before->Prev); + reader->TexReadCount++; + rc_list_add(&writer->TexReaders, rc_list(&s->C->Pool, reader)); } static void scan_read(void * data, struct rc_instruction * inst, @@ -845,7 +1155,22 @@ static void scan_read(void * data, struct rc_instruction * inst, if (*v && (*v)->Writer == s->Current) { /* The instruction reads and writes to a register component. * In this case, we only want to increment dependencies by one. + * Why? + * Because each instruction depends on the writers of its source + * registers _and_ the most recent writer of its destination + * register. In this case, the current instruction (s->Current) + * has a dependency that both writes to one of its source + * registers and was the most recent writer to its destination + * register. We have already marked this dependency in + * scan_write(), so we don't need to do it again. + */ + + /* We need to make sure we are adding s->Current to the + * previous writer's list of TexReaders, if the previous writer + * was a TEX instruction. */ + add_tex_reader(s, s->PrevWriter[chan], s->Current); + return; } @@ -866,6 +1191,7 @@ static void scan_read(void * data, struct rc_instruction * inst, /* Only update the current instruction's dependencies if the * register it reads from has been written to in this block. */ if ((*v)->Writer) { + add_tex_reader(s, (*v)->Writer, s->Current); s->Current->NumDependencies++; } } @@ -898,6 +1224,9 @@ static void scan_write(void * data, struct rc_instruction * inst, if (*pv) { (*pv)->Next = newv; s->Current->NumDependencies++; + /* Keep track of the previous writer to s->Current's destination + * register */ + s->PrevWriter[chan] = (*pv)->Writer; } *pv = newv; @@ -919,22 +1248,33 @@ static void is_rgb_to_alpha_possible_normal( } -static void schedule_block(struct r300_fragment_program_compiler * c, +static void schedule_block(struct schedule_state * s, struct rc_instruction * begin, struct rc_instruction * end) { - struct schedule_state s; unsigned int ip; - memset(&s, 0, sizeof(s)); - s.C = &c->Base; - /* Scan instructions for data dependencies */ ip = 0; for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { - s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current)); - memset(s.Current, 0, sizeof(struct schedule_instruction)); + s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current)); + memset(s->Current, 0, sizeof(struct schedule_instruction)); + + if (inst->Type == RC_INSTRUCTION_NORMAL) { + const struct rc_opcode_info * info = + rc_get_opcode_info(inst->U.I.Opcode); + if (info->HasTexture) { + s->TEXCount++; + } + } - s.Current->Instruction = inst; + /* XXX: This causes SemWait to be set for all instructions in + * a block if the previous block contained a TEX instruction. + * We can do better here, but it will take a lot of work. */ + if (s->PrevBlockHasTex) { + s->Current->TexReadCount = 1; + } + + s->Current->Instruction = inst; inst->IP = ip++; DBG("%i: Scanning\n", inst->IP); @@ -943,17 +1283,18 @@ static void schedule_block(struct r300_fragment_program_compiler * c, * counter-intuitive, to account for the case where an * instruction writes to the same register as it reads * from. */ - rc_for_all_writes_chan(inst, &scan_write, &s); - rc_for_all_reads_chan(inst, &scan_read, &s); + rc_for_all_writes_chan(inst, &scan_write, s); + rc_for_all_reads_chan(inst, &scan_read, s); - DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies); + DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies); - if (!s.Current->NumDependencies) - instruction_ready(&s, s.Current); + if (!s->Current->NumDependencies) { + instruction_ready(s, s->Current); + } /* Get global readers for possible RGB->Alpha conversion. */ - s.Current->GlobalReaders.ExitOnAbort = 1; - rc_get_readers(s.C, inst, &s.Current->GlobalReaders, + s->Current->GlobalReaders.ExitOnAbort = 1; + rc_get_readers(s->C, inst, &s->Current->GlobalReaders, is_rgb_to_alpha_possible_normal, is_rgb_to_alpha_possible, NULL); } @@ -963,13 +1304,9 @@ static void schedule_block(struct r300_fragment_program_compiler * c, end->Prev = begin->Prev; /* Schedule instructions back */ - while(!s.C->Error && - (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { - if (s.ReadyTEX) - emit_all_tex(&s, end); - - while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha)) - emit_one_alu(&s, end); + while(!s->C->Error && + (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) { + emit_instruction(s, end); } } @@ -984,13 +1321,20 @@ static int is_controlflow(struct rc_instruction * inst) void rc_pair_schedule(struct radeon_compiler *cc, void *user) { - struct schedule_state s; - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; + struct schedule_state s; struct rc_instruction * inst = c->Base.Program.Instructions.Next; + unsigned int * opt = user; memset(&s, 0, sizeof(s)); + s.Opt = *opt; s.C = &c->Base; + if (s.C->is_r500) { + s.CalcScore = calc_score_readers; + } else { + s.CalcScore = calc_score_r300; + } + s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8); while(inst != &c->Base.Program.Instructions) { struct rc_instruction * first; @@ -1005,6 +1349,11 @@ void rc_pair_schedule(struct radeon_compiler *cc, void *user) inst = inst->Next; DBG("Schedule one block\n"); - schedule_block(c, first, inst); + memset(s.Temporary, 0, sizeof(s.Temporary)); + s.TEXCount = 0; + schedule_block(&s, first, inst); + if (s.PendingTEX) { + s.PrevBlockHasTex = 1; + } } }