/****************************************** Simple Pipeline Processor (SP/1) V1.01 Synthesizable SFL source code. (C)Copyright by Naohiko Shimizu, 1998,2005. All rights are reserved. Contact information: Dr. Naohiko Shimizu IP ARCH, Inc. Email: nshimizu@ip-arch.jp URL: http://www.ip-arch.jp/ The above URL is the primary distribution site for SP/1. You can get latest copy of sp1.sfl and patch information (if available). You can use full or part of this file for your project, if and only if the copyright notice is preserved on the every file and the every product which uses SP/1 or derivatives. You can convert the code of this processor to verilog or vhdl with sfl2vl See the URL: http://www.ip-arch.jp/indexe.html Features of the processor: Data 8bit, Instruction 16bit, Harvard architecture, 8bit data/inst address spaces, load/store architecture, It has two types of instruction, i.e. R-type and I-type. R-type instructions |OP|r2|r3|r1| - | ... r1 <- r2 op r3 The instructions belong to the R-type are: ADD addition AND logical and SLT set less than (like MIPS architecture) NOT logical not SR logical shift right by one RR Internal register read ex. rr $1,$i2 WR Internal register write ex. wr $2,$i3 RI Return from interrupt ex. ri I-type instructions |OP|r1|r2| I | ... r1 <- r2(I) The instructions belong to the I-type are: LD load from memory ST store to memory LDA load address(or add immediate) BZ branch on r1 is zero BAL branch and link IN and OUT instructions are reserved for future use. There are four internal registers which are mapped to the 0x00 through 0x03 of IO space: ir0: Control and provide about interrupt and counter operations. <0>: interrupt mask <1>: counter run <6>: counter interruption occurred <7>: external interruption occurred ir1: The free run down counter. If the counter hit 0x00 and the interrupt mask is 0b1 then an interrupt will be occurred. ir2: The interrupt new PC. ir3: The interrupt old PC. Pipeline operations: 5 stage pipeline IF|ID|EX|ME|WB Stall condition will be detected during the ID stage, and wait the resource availability within the stage. There are three forwarding path within SP/1. You may stall up to 1 cycle for the I type instruction result. There also is a simple branch predicting mechanism for minimize the taken branch stall. C to SP/1 translation sample: ABI: procedure return value : $1 procedure argument (1st): $1 procedure return address : $2 frame pointer : in memory at 'sp' global pointer : in memory at 'gp' size of integer: 8bit Consider the following C code. int foo(int k) { if(k == 0) return(0); return(k+foo(k-1)); } You can translate this code into the SP/1 assembler code like following: .entry foo foo: ld $3,sp ; load frame pointer lda $3,-2($3) ; reserve two integers st $3,sp ; update frame pointer st $2,0($3) ; save return address st $1,1($3) ; save local argument 'k' bz $1,foo_1 ; if 'k' is equal to 0, return lda $1,-1($1) ; make 'k-1' bal $2,foo ; call foo ld $3,sp ; load frame pointer ld $2,1($3) ; load local argument 'k' add $1,$1,$2 ; calculate 'k+return value of foo' foo_1: ld $2,0($3) ; load return address lda $3,2($3) ; calculate original frame pointer st $3,sp ; update frame pointer lda $0,0($0) ; make 0 for force branch bz $0,0($2) ; return from procedure .end Update informations: 03-Feb-2005: modify signal names to fit sfl2vl. 09-Nov-1998: Brush up the code and slightly change the architecture. $0 is now usable for general purpose, internal registers are mapped to the IO space, and interrupts are now change to the level sensitive. These changes are feed back from sp1c.sfl. 05-Nov-1998: Forwarding detection logic bug corrected. The instructions which does not produce a result could not update register map. Branch prediction logic bug corrected. The BTB was checked only for the PC. It must check the real target address too. 29-Oct-1998: Move many of the common operations to dedicated stages. Reduce power on reset registers, and move resetting to the start procedure. These changes make the code more readable. Make general purpose registers as a register file. 26-Oct-1998: Change the forwarding conditions. The LDA instruction result can be forwarded in the EX stage, but the original code waited for the MEM stage. Then speed up will be achieved in the contention case. Another change is the BTB registration condition. The branch target of the Return interruption (RI) instruction will always miss the prediction, then eliminate the BTB registration. Also C to SP/1 translation sample code was listed in the comment. 22-Oct-1998: First public announcement. ******************************************/ declare inc8 { input in<8> ; output out<8> ; instrin do ; instr_arg do(in) ; } declare dec8 { input in<8> ; output out<8> ; instrin do ; instr_arg do(in) ; } declare cla8 { input cin, in1<8>, in2<8> ; output out<8> ; instrin do ; instr_arg do(cin,in1,in2) ; } /* R-type instructions |OP|r2|r3|r1| - | ... r1 <- r2 op r3 */ %d ADD 0x0 %d AND 0x1 %d RI 0x2 %d SLT 0x3 %d NOT 0x4 %d SR 0x6 /* I-type instructions |OP|r1|r2| I | ... r1 <- r2(I) */ %d LD 0x8 %d ST 0x9 %d LDA 0xa %d IN 0xc %d OUT 0xd %d BZ 0xe %d BAL 0xf %d ITYPE op<15> %d OPCODE op<15:12> %d R2 op<11:10> %d R3 op<9:8> %d R1 op<7:6> %d IRSEL op<1:0> %d I op<7:0> %d eOPCODE eop<15:12> %d eR2 eop<11:10> %d eR3 eop<9:8> %d eR1 eop<7:6> %d eI eop<7:0> %d eITYPE eop<15> %d mOPCODE mop<15:12> %d mR2 mop<11:10> %d mR3 mop<9:8> %d mR1 mop<7:6> %d mI mop<7:0> %d ALUADD 0b00001 %d ALUSLT 0b00010 %d ALUAND 0b00100 %d ALUNOT 0b01000 %d ALUSR 0b10000 declare sp1 { input inst<16> ; input dti<8> ; output dto<8> ; output iadrs<8> ; output adrs<8> ; instrin extint; instrout inst_read; instrout memory_read; instrout memory_write; } declare reg4 { input regin<8> ; input reginadr<2> ; input regoutadr<2> ; input regoutbadr<2> ; output regout<8> ; output regoutb<8> ; instrin read; instrin readb; instrin write; instr_arg read(regoutadr); instr_arg readb(regoutbadr); instr_arg write(reginadr,regin); } module sp1 { input inst<16> ; input dti<8> ; output dto<8> ; output iadrs<8> ; output adrs<8> ; instrin extint; instrself start; instrself targetif; instrout inst_read; instrout memory_read; instrout memory_write; reg pc<8> ; /* program counter */ reg tpc<8> ;/* branch target program counter */ reg_wr st0 ; /* power-on reset capable sequence registers */ reg st1 ; reg st2 ; reg inten, cnten, cntintflag ; reg ir1<8>,ir2<8>,ir3<8>; reg eforcenop; /* force NOP register */ reg op<16> ; reg md<8> ; /* stage registers */ reg dpc<8>, epc<8>, eop<16>, eop1<8>, eop2<8>; reg clasel1<4>, clasel2<4>, alusel<5>; reg mpc<8>, mop<16>, malu<8>, msrc<8>, mar<8>; reg wop<2>, walu<8>, xalu<8>, btb<16>; reg ntif, dtif<2>, etif<2>, btbv; reg ru1<4>, ru2<4>, ru3<4>, streq; inc8 inc ; dec8 dec ; cla8 cla ; reg4 gr; sel dtop1<8>, dtop2<8>, br_taken, npc<8>; sel dclasel1<4>, dclasel2<4>, dalusel<5>; sel er2f<8>, clain2<8>, aluo<8>; sel clain1<8>, targetpc<8>; sel op1chk, op2chk, stall_req; instr_arg memory_write(dto); instr_arg targetif(targetpc); stage_name int { task intt() ; } stage_name address { task addresst() ; } stage_name pipectl { task pipet() ; } stage_name counter { task countert() ; } stage_name iff { task ift(pc) ; } stage_name id { task idt(dpc,op,dtif) ; } stage_name exec { task ext(epc,eop,eop1,eop2,etif,clasel1,clasel2,alusel) ; } stage_name mms { task mmt(mpc,mop,malu,msrc) ; } stage_name wrb { task wbt(wop,walu) ; } /* Common operations for every stages must be described here */ par{ st0 := 0b1 ; st1 := st0; st2 := st1; any { (st2 == 0b0) & (st1 == 0b1): start(); } } instruct extint generate int.intt(); instruct targetif par { ntif := 0b1; tpc := targetpc; eforcenop := 0b1; } instruct start par { /* resetting registers */ inten := 0b0; cnten := 0b0; cntintflag := 0b0; btb := 0x0000; btbv := 0b0; op := 0x0000; eforcenop :=0b0; ntif :=0b0; dtif :=0b00; etif :=0b00; ru1 :=0x0; ru2 := 0x0; ru3 := 0x0; streq :=0b0; /* start instruction fetch */ generate iff.ift(0x00); generate pipectl.pipet(); generate address.addresst(); generate counter.countert(); } stage address { par { /* Why this statement is here? The adrs should be stable after the MEM stage for the ST instruction. */ adrs = mar; any { /* ntif and tpc will be set in the exec stage on the case of taken branch */ ntif == 0b0: par { npc = inc.do(pc).out; iadrs = pc; } ntif : par { npc = inc.do(tpc).out; iadrs = tpc; } } } } stage counter { any { cnten: ir1 := dec.do(ir1).out; cnten & (ir1 == 0x00): cntintflag := 0b1; } } stage int { finish; } stage pipectl { par { /* The r2 field designates the data request for R-form instructions and ST and BZ instructions. While the r3 field designates for both R-form and I-form data requests. The data memory address bus should be stable right after the ST operation cycle, and it will be preserved by the stall request. */ stall_req = (( ^ITYPE | (OPCODE == BZ ) | (OPCODE == ST ) | (OPCODE == OUT ) ) & op1chk) | (( (OPCODE == ADD) | (OPCODE == AND) | (OPCODE == SLT) | (ITYPE & ^(R3 == 0b00)) ) & op2chk) | (( (OPCODE == LD ) | (OPCODE == ST )) & streq); /* streq means that previous instruction was ST, and bus contention may be occurred */ ru2 := ru1; ru3 := ru2; /* forwarding control logic for SP/1 */ op1chk = ( ru1<3> & ((R2 @ ru1<2:1>) == 0b00)) & ^dclasel1<1>; op2chk = ( ru1<3> & ((R3 @ ru1<2:1>) == 0b00)) & ^dclasel2<1>; alt { ((0b1 || R2 || 0b0) @ ru1) == 0x0: dclasel1 = 0b0010; ((0b1 || R2 ) @ ru2<3:1> ) == 0b000: dclasel1 = 0b0100; ((0b1 || R2 ) @ ru3<3:1> ) == 0b000: dclasel1 = 0b1000; else: dclasel1 = 0b0001; } alt { ((0b1 || R3 || 0b0) @ ru1) == 0x0: dclasel2 = 0b0010; ((0b1 || R3 ) @ ru2<3:1> ) == 0b000: dclasel2 = 0b0100; ((0b1 || R3 ) @ ru3<3:1> ) == 0b000: dclasel2 = 0b1000; else: dclasel2 = 0b0001; } any { /* During the stall, there is no destination of course. */ stall_req: par { ru1 := 0b0000; streq := 0b0; } /* Registering of the destination register to the usage map. */ stall_req == 0b0: par { streq := (OPCODE == ST); any { ^ITYPE & id.idt: ru1 := 0b1 || R1 || 0b0; (OPCODE == LD ) | (OPCODE == LDA) | (OPCODE == IN ) | (OPCODE == BAL) : ru1 := 0b1 || R2 || ^(OPCODE == LDA ) ; else: ru1 := 0b0000; } } } } } stage iff { state_name fetch,int1,int2; first_state fetch ; state fetch any { inten & (int.intt | cntintflag) : goto int1; else: any { /* stall_req will be issued in the decode stage. Because there is no way to cancel the relayed nor generated stages, stall_req is not a latched signal. Be careful for the signal delay.*/ stall_req == 0b0: par { any { ntif : ntif := 0b0; } /* any */ any { btbv & ((npc @ btb<15:8>) == 0x00): par { relay id.idt(npc, inst_read().inst, 0b1 || ntif) ; generate iff.ift(btb<7:0>); } else: par { relay id.idt(npc, inst_read().inst, 0b0 || ntif) ; generate iff.ift(npc); } } /* any */ } /* par */ } /* any */ } /* par */ state int1 par { inten := 0b0; /* we will wait for stalled instruction in D stage. */ if(^id.idt) goto int2; } state int2 par { /* At this state the last instruction in pipe will sit on the mem stage, and the npc will show the acutual address for returning interrupt. */ targetif(ir2); ir3 := npc; goto fetch; } } stage id { par { dtop1 = gr.read(R2).regout; any { ITYPE & (R3 == 0b00):dtop2 = 0x00; else :dtop2 = gr.readb(R3).regoutb; } any { OPCODE == SLT : dalusel = ALUSLT; OPCODE == AND : dalusel = ALUAND; OPCODE == NOT : dalusel = ALUNOT; OPCODE == SR : dalusel = ALUSR; else : dalusel = ALUADD; } any { ^stall_req: relay exec.ext(dpc, op, dtop1, dtop2 ,dtif, dclasel1, dclasel2, dalusel) ; } /* any */ } /* par */ } /* stage */ stage exec { par { br_taken = (eOPCODE == BAL) | ((eOPCODE == BZ) & (er2f == 0x00)); /* alu input1 forwarding control */ any { eITYPE: clain1 = eI; else: clain1 = er2f; } /* alu input2 forwarding control */ any { clasel2<0>: clain2 = eop2; clasel2<1>: clain2 = malu; clasel2<2>: clain2 = walu; clasel2<3>: clain2 = xalu; } /* alu er2 forwarding control */ any { clasel1<0>: er2f = eop1; clasel1<1>: er2f = malu; clasel1<2>: er2f = walu; clasel1<3>: er2f = xalu; } any { alusel<0>: aluo = cla.do(0b0, clain1, clain2).out; alusel<1>: aluo = 0b0000000 || cla.do(0b1, clain1, ^clain2).out<7>; alusel<2> : aluo = clain1 & clain2; alusel<3> : aluo = ^clain1; alusel<4> : aluo = 0b0 || clain1<7:1> ; } any { (eforcenop == 0b0) | (etif<0> == 0b1): par { any { (eOPCODE == LD ) | (eOPCODE == ST ) : mar := cla.out; } any { (eOPCODE == RI): par { targetif(ir3); finish; } br_taken: par { btbv := 0b1; btb := epc || cla.out; any { ^etif<1> | ^((btb<15:8> @ epc) == 0x00) | ^((btb<7:0> @ cla.out) == 0x00): /* prediction miss or not predicted case */ par { /* ntif and tpc will be feed to the if stage */ targetif(cla.out); } else: eforcenop := 0b0; } any { (eOPCODE == BZ): finish; else: relay mms.mmt(epc, eop, aluo, er2f); } } /* par */ else: par { any { etif<1> & ((btb<15:8> @ epc) == 0x00): /* prediction miss not taken case */ par { /* ntif and tpc will be feed to the if stage */ targetif(epc); /* on the case of miss BTB will be discarded */ btbv := 0b0; } else: eforcenop := 0b0; } any { (eOPCODE == BZ): finish; else: relay mms.mmt(epc, eop, aluo, er2f); } } /* par */ } /* any */ } /* par */ else: finish; } /* any */ } /* par */ } stage mms { par { any { mOPCODE == LD : relay wrb.wbt(mR2, memory_read().dti); mOPCODE == BAL : relay wrb.wbt(mR2, mpc); mOPCODE == LDA : relay wrb.wbt(mR2, malu); (mOPCODE == IN) : any { malu == 0x00: relay wrb.wbt(mR2, int.intt || cntintflag || 0b0000 || cnten || inten ); malu == 0x01: relay wrb.wbt(mR2, ir1); malu == 0x02: relay wrb.wbt(mR2, ir2); malu == 0x03: relay wrb.wbt(mR2, ir3); } mOPCODE == ST : par { memory_write(msrc) ; finish; } (mOPCODE == OUT) : par { any { malu == 0x00: par { cntintflag := walu<6>; cnten := malu<1>; inten := malu<0>; } malu == 0x01: ir1:=malu; malu == 0x02: ir2:=malu; malu == 0x03: ir3:=malu; } finish; } else : relay wrb.wbt(mR3, malu ) ; } } } stage wrb { par { xalu := walu; gr.write(wop,walu); finish; } } } module reg4 { input regin<8> ; input reginadr<2> ; input regoutadr<2> ; input regoutbadr<2> ; output regout<8> ; output regoutb<8> ; instrin read; instrin readb; instrin write; reg r0<8>,r1<8>,r2<8>,r3<8> ; instruct read any { regoutadr == 0b00: regout = r0; regoutadr == 0b01: regout = r1; regoutadr == 0b10: regout = r2; regoutadr == 0b11: regout = r3; } instruct readb any { regoutbadr == 0b00: regoutb = r0; regoutbadr == 0b01: regoutb = r1; regoutbadr == 0b10: regoutb = r2; regoutbadr == 0b11: regoutb = r3; } instruct write any { reginadr == 0b00: r0 := regin; reginadr == 0b01: r1 := regin; reginadr == 0b10: r2 := regin; reginadr == 0b11: r3 := regin; } } module inc8 { input in<8> ; output out<8> ; instrin do ; instruct do out=in+0x01; } module dec8 { input in<8> ; output out<8> ; instrin do ; instruct do out=in-0x01; } module cla8 { input cin, in1<8>, in2<8> ; output out<8> ; instrin do ; instruct do out=in1+in2+(0b0000000 || cin); }