NAME ^

docs/dev/jit_i386.dev - Parrot JIT (i386/gcc)

ABSTRACT ^

This PDD describes the i386 gcc JIT implementation.

DESCRIPTION ^

JIT i386/gcc is a combination of unrolled assembly instructions and the Computed Goto Predereferenced (CGP) run loop. For branch instructions the function implementation in the standard core is called.

Another difference of JIT/i386 is that most vtable functions are JITed instructions which use register mappings.

For a better understanding of the control flow between these basically 3 run loop cores, an example shows the gory details.

EXAMPLE ^

Given the following PASM program, the righthand three columns show where each opcode gets executed:

        PASM                 JIT ops   Normal     CGP ops

                             (call cgp_core)      (jmp back)

        set I0, 10           set_i_ic
        print I0             (call)               print_i
        print "\n"                                print_sc
        bsr inc              (call)     bsr_ic    cpu_ret
        end                  (jmp) HALT           end (ret)
                             end (ret)
  inc:
        inc I0               inc_i
        new P0, .PerlString  new_p_ic
        set P0, I0           set_p_i
        print P0             (call)               print_p
        print "\n"                                print_sc
        ret                  (call)     ret       cpu_ret

Startup sequence ^

In runops_jit a prederefed copy of the opcode stream is built by init_prederef. Then build_asm generates the assembler code sequence as usual. This generated code (shown as runops_jit in ddd) is then executed.

Generate minimal stack frame, save %ebx

    0x812c510 <jit_func>:       push   %ebp
    0x812c511 <jit_func+1>:     mov    %esp,%ebp
    0x812c513 <jit_func+3>:     push   %ebx

Get the program counter to %ebx

    0x812c514 <jit_func+4>:     mov    0xc(%ebp),%ebx

Push interpreter and (opcode_t*) 1 and call cgp_core

    0x812c517 <jit_func+7>:     push   $0x8113db8
    0x812c51c <jit_func+12>:    push   $0x1
    0x812c521 <jit_func+17>:    mov    $0x1,%eax
    0x812c526 <jit_func+22>:    call   0x80b5830 <cgp_core>

In cgp_core all callee saved registers are saved.

    0x80b5830 <cgp_core>:       push   %ebp
    0x80b5831 <cgp_core+1>:     mov    %esp,%ebp
    0x80b5833 <cgp_core+3>:     sub    $0xdc,%esp
    0x80b5839 <cgp_core+9>:     lea    0x8(%ebp),%eax
    0x80b583c <cgp_core+12>:    push   %edi
    0x80b583d <cgp_core+13>:    push   %esi
    0x80b583e <cgp_core+14>:    push   %ebx

In %eax the init flag is set to -1

    0x80b583f <cgp_core+15>:    mov    %eax,0xfffffff

The parameter *cur_op (the program counter) is put into %esi and ...

    0x80b5842 <cgp_core+18>:    mov    0x8(%ebp),%esi
    0x80b5845 <cgp_core+21>:    test   %esi,%esi
    0x80b5847 <cgp_core+23>:    jne    0x80b5853 <cgp_core+35>
    0x80b5849 <cgp_core+25>:    mov    $0x810ca60,%eax
    0x80b584e <cgp_core+30>:    jmp    0x80bb470 <cgp_core+23616>

... compared to 1

    0x80b5853 <cgp_core+35>:    cmp    $0x1,%esi
    0x80b5856 <cgp_core+38>:    jne    0x80b5860 <cgp_core+48>

If true, the program jumps to the return address of above function call, i.e. it jumps back again to JIT code.

    0x80b5858 <cgp_core+40>:    jmp    *0x4(%ebp)

Back again in JIT code, the init flag is checked

    0x812c52b <jit_func+27>:    test   %eax,%eax
    0x812c52d <jit_func+29>:    jne    0x812c536 <jit_func+38>

... and if zero, the function would be left.

 [   0x812c52f <jit_func+31>:   pop    %ebx          ]
 [   0x812c531 <jit_func+33>:   mov    %ebp,%esp     ]
 [   0x812c533 <jit_func+35>:   pop    %ebp          ]
 [   0x812c535 <jit_func+37>:   ret                  ]

When coming from the init sequence, program flow continues by checking the resume_offset and jumping to the desired instruction

    0x812c536 <jit_func+38>:    mov    %ebx,%eax
    0x812c538 <jit_func+40>:    sub    $0x400140c0,%eax
    0x812c53e <jit_func+46>:    mov    $0x812c4a8,%edx
    0x812c543 <jit_func+51>:    jmp    *(%edx,%eax,1)

set I0, 10 and save_registers

    0x812c546 <jit_func+54>:    mov    $0xa,%ebx
    0x812c54b <jit_func+59>:    mov    %ebx,0x8113db8

Now non-JITed code follows -- get the address from the prederefed op_func_table and call it:

    0x812c551 <jit_func+65>:    mov    $0x812ac0c,%esi
    0x812c556 <jit_func+70>:    call   *(%esi)

    inline op print(in INT) {
      printf(INTVAL_FMT, (INTVAL)$1);
      goto NEXT();
    }

where the goto NEXT() is a simple:

    0x80b5b49 <cgp_core+793>:   jmp    *(%esi)

    op print(in STR) {
     ...
      goto NEXT();
    }

As the last instruction of the non-JITed code sequence is a branch, this is not executed in CGP, but the opcode:

    inline op cpu_ret() {
    #ifdef __GNUC__
    # ifdef I386
       asm("ret")

is executed. This opcode is patched into the prederefed code stream by Parrot_jit_normal_op at the end of a non-JITed code sequence. This returns to JIT code again, where the next instruction gets called as a function in the standard core ...

    0x812c558 <jit_func+72>:    push   $0x8113db8
    0x812c55d <jit_func+77>:    push   $0x400140dc
    0x812c562 <jit_func+82>:    call   0x805be60 <Parrot_bsr_ic>
    0x812c567 <jit_func+87>:    add    $0x8,%esp

... and from the return result in %eax, the new code position in JIT is calculated and gets jumped to:

    0x812c56a <jit_func+90>:    sub    $0x400140c0,%eax
    0x812c570 <jit_func+96>:    mov    $0x812c4a8,%edx
    0x812c575 <jit_func+101>:   jmp    *(%edx,%eax,1)

Now in the subroutine inc:

    0x812c580 <jit_func+112>:   mov    0x8113db8,%ebx
    0x812c586 <jit_func+118>:   inc    %ebx

Save register and arguments and call pmc_new_noinit:

    0x812c587 <jit_func+119>:   push   %edx
    0x812c588 <jit_func+120>:   push   $0x11
    0x812c58d <jit_func+125>:   push   $0x8113db8
    0x812c592 <jit_func+130>:   call   0x806fc60 <pmc_new_noinit>

put the PMC* into Parrot's register:

    0x812c597 <jit_func+135>:   mov    %eax,0x8113fb8

and prepare arguments for a VTABLE call:

    0x812c59d <jit_func+141>:   push   %eax
    0x812c59e <jit_func+142>:   push   $0x8113db8
    0x812c5a3 <jit_func+147>:   mov    0x10(%eax),%eax
    0x812c5a6 <jit_func+150>:   call   *0x18(%eax)
    0x812c5a9 <jit_func+153>:   add    $0x10,%esp
    0x812c5ac <jit_func+156>:   pop    %edx

and another one:

    0x812c5ae <jit_func+158>:   push   %edx

Here, with the mapped register in %ebx, push I0, the PMC and the interpreter:

    0x812c5af <jit_func+159>:   push   %ebx
    0x812c5b0 <jit_func+160>:   mov    0x8113fb8,%eax
    0x812c5b6 <jit_func+166>:   push   %eax
    0x812c5b7 <jit_func+167>:   push   $0x8113db8

and call the vtable:

    0x812c5bc <jit_func+172>:   mov    0x10(%eax),%eax
    0x812c5bf <jit_func+175>:   call   *0xdc(%eax)
    0x812c5c5 <jit_func+181>:   add    $0xc,%esp
    0x812c5c8 <jit_func+184>:   pop    %edx

As this ends the JITed section, used registers are saved back to Parrot's register:

    0x812c5ca <jit_func+186>:   mov    %ebx,0x8113db8

and again the code in cgp_core gets called:

    0x812c5d0 <jit_func+192>:   mov    $0x812ac48,%esi
    0x812c5d5 <jit_func+197>:   call   *(%esi)

which after executing the print returns back here in JIT, where the ret is called:

    0x812c5d7 <jit_func+199>:   push   $0x8113db8
    0x812c5dc <jit_func+204>:   push   $0x40014118
    0x812c5e1 <jit_func+209>:   call   0x805d5e0 <Parrot_ret>
    0x812c5e6 <jit_func+214>:   add    $0x8,%esp

From the returned PC a JIT address is calculated, which gets executed:

    0x812c5e9 <jit_func+217>:   sub    $0x400140c0,%eax
    0x812c5ef <jit_func+223>:   mov    $0x812c4a8,%edx
    0x812c5f4 <jit_func+228>:   jmp    *(%edx,%eax,1)

Now at the end opcode, the CGP code for HALT() gets jumped to:

    0x812c578 <jit_func+104>:   mov    $0x80b5877,%esi
    0x812c57d <jit_func+109>:   jmp    *%esi

which is:

    inline op end() {
      HALT();
    }

or, set return result:

    0x80b8b6f <cgp_core+13119>: xor    %eax,%eax
    ...

and clean up stack frame and ret:

    0x80bb470 <cgp_core+23616>: lea    0xffffff18(%ebp),%esp
    0x80bb476 <cgp_core+23622>: pop    %ebx
    0x80bb477 <cgp_core+23623>: pop    %esi
    0x80bb478 <cgp_core+23624>: pop    %edi
    0x80bb479 <cgp_core+23625>: mov    %ebp,%esp
    0x80bb47b <cgp_core+23627>: pop    %ebp
    0x80bb47c <cgp_core+23628>: ret

This returns after the position where cgp_core was called during the init sequence, but now the return value %eax is zero and the..

    0x812c52b <jit_func+27>:    test   %eax,%eax
    0x812c52d <jit_func+29>:    jne    0x812c536 <jit_func+38>
    0x812c52f <jit_func+31>:    pop    %ebx
    0x812c531 <jit_func+33>:    mov    %ebp,%esp
    0x812c533 <jit_func+35>:    pop    %ebp
    0x812c535 <jit_func+37>:    ret

... whole story ends here, we are back again in runops_jit.

So this is rather simple once it gets going.

BUGS ^

The floating point registers do not get saved to Parrot before vtable calls. This assumes that external routines preserve the FP stack pointer and don't use more the 4 floating point registers at once.

AUTHOR ^

Leopold Toetsch <lt@toetsch.at>

VERSION ^

CURRENT ^

14.02.2003 by Leopold Toetsch


parrot