NAME ^

pirparser.c - parser for Parrot Intermediate Representation

THOUGHTS FOR LATER ^

PARSER INTERNALS ^

The parser_state structure has the following fields:

 typedef struct parser_state {
   struct     lexer_state *lexer; -- the lexer
   token      curtoken;           -- the current token as returned by the lexer
   char     **heredoc_ids;        -- array for holding heredoc arguments
   unsigned   heredoc_index;      -- index to keep track of heredoc ids in the array
   unsigned   parse_errors;       -- counter for parse_errors
   pirvtable *vtable;             -- vtable holding pointers for output routines
 }

PARSER API ^

void exit_parser(parser_state *p)

Clean up and exit the program normally.

int get_parse_errors(parser_state *p)

return the number of parse errors.

parser_state *new_parser(char const *filename, pirvtable *vtable)

constructor for a parser_state object. The specified filename is parsed. The semantic actions in vtable are called at certain points in the code. The vtable is constructed in the parser's client code.

struct lexer_state const *get_lexer(parser_state *p)

returns the specified parser's lexer

token get_token(parser_state *p)

returns the specified parser's current token

HELPER FUNCTIONS ^

static void resize_heredoc_args(parser_state *p)

Reallocate memory for the array holding heredoc arguments. If needed, the array is resized to twice its previous size. So, initially it's MAX_HEREDOC_ARGS, after the first resize(), it's 2 times MAX_HEREDOC_ARGS, after the second time it's 2 * 2 * MAX_HEREDOC_ARGS, etc.

static void syntax_error(parser_state *p, int numargs, ...)

Handle all syntax error through this function. numargs is the number of variable arguments. All arguments should be of type "char *" !!!

static void match(parser_state *p, token expected)

checks whether the current token is the same as the expected token. If so, all is ok, and the next token is fetched. If not, an appropiate syntax error is reported.

GRAMMAR ^

Conventions ^

The following conventions are used:

 [ foo ]       indicate an optional foo element
 { foo }       indicate zero or more foo elements
 ( foo | bar ) either foo or bar
 IDENTIFIER    match a token of type IDENTIFIER
 'string'      match the literal 'string'

Grammar rules ^

Expressions

static token expression(parser_state *p)

  expression -> ( IDENTIFIER | INTC | NUMC | STRINGC | register )
static void string_value(parser_state *p)

  string_value -> SREG | PASM_SREG | STRINGC
static void method(parser_state *p)

  method -> IDENTIFIER | STRINGC
static void target(parser_state *p)

  target   -> register | IDENTIFIER
static void type(parser_state *p)

  type -> 'int' | 'num' | 'pmc' | 'string'
static void key(parser_state *p)

  key -> '-' expression | '..' expression | expression [ '..' [ expression ] ]
static void keylist(parser_state *p)

  keylist -> '[' key { (';'|',') key } ']'
static void arg_flags(parser_state *p)

  arg_flags -> { arg_flag }

  arg_flag -> ':flat' | ':named' [ '(' STRINGC ')' ]
static void argument(parser_state *p)

  argument -> HEREDOCID | expression arg_flags | STRINGC ('=>' expression | arg_flags)
static void argument_list(parser_state *p)

  argument_list -> argument { ',' argument }
static void arguments(parser_state *p)

  arguments -> '(' [argument_list] ')' heredoc_arguments

  heredoc_arugments -> { HEREDOC_STRING }

Statements

static void methodcall(parser_state *p)

  methodcall -> INVOCANT_IDENT method arguments
static void arith_expression(parser_state *p)

  arith_expr -> [ binop expression ]

  binop      -> '+'  | '-' | '*'  | '/' | '//' | '%'  | '~~'  | '~'
              | '&&' | '&' | '||' | '|' | '<<' | '>>' | '>>>' | '.'
static void parrot_instruction(parser_state *p)

  parrot_instruction -> PARROT_OP [ expression {',' expression } ]
static void assignment(parser_state *p)

  assignment -> '=' ( unop expression
                    | expression arith_expr
                    | target ( keylist | arguments )
                    | STRINGC arguments
                    | 'global' STRINGC
                    | heredocstring
                    | methodcall
                    | 'null'
                    | parrot_instruction
                    )

  unop       -> '-' | '!' | '~'
static void return_statement(parser_state *p)

  return_statement -> '.return' ( arguments
                                | target arguments
                                | methodcall
                                )
                                '\n'
static void yield_statement(parser_state *p)

  yield_statement -> '.yield' arguments '\n'
static void close_ns(parser_state *p)

  close_ns -> '.endnamespace' IDENTIFIER '\n'
static void open_ns(parser_state *p)

  open_ns -> '.namespace' IDENTIFIER '\n'
static void local_id_list(parser_state *p)

  local_id_list -> local_id { ',' local_id }

  local_id  -> IDENTIFIER [':unique_reg']
static void declaration_list(parser_state *p)

  declaration_list -> type local_id_list '\n'
static void local_declaration(parser_state *p)

  local_declaration -> '.local' declaration_list
static void lex_declaration(parser_state *p)

  lex_declaration -> '.lex' STRINGC ',' target '\n'
static void conditional_expression(parser_state *p)

  conditional_expression -> expression [cond_op expression]

  cond_op -> '>' | '>=' | '<' | '<=' | '==' | '!='
static void jump_statement(parser_state *p)

  jump_statement -> 'goto' IDENTIFIER '\n'
static void goto_statement(parser_state *p)

  goto_statement -> jump_statement
static void unless_statement(parser_state *p)

  unless_statement -> 'unless' (['null'] expression | conditional_expression) jump_statement
static void if_statement(parser_state *p)

  if_statement -> 'if' (['null'] expression | conditional_expression) jump_statement
static void const_definition(parser_state *p)

  const_definition -> 'int' IDENTIFIER '=' INTC
                    | 'num' IDENTIFIER '=' NUMC
                    | 'pmc' IDENTIFIER '=' STRINGC
                    | 'string' IDENTIFIER '=' STRINGC
static void param_flags(parser_state *p)

  param_flags -> { param_flag }

  param_flag -> ':slurpy'
              | ':named'['(' STRINGC ')']
              | ':unique_reg'
              | ':optional'
              | ':opt_flag'
static void invokable(parser_state *p)

  invokable -> IDENTIFIER | PREG
static void long_invocation(parser_state *p)

  long-invocation -> '.pcc_begin' '\n'
                     { '.arg' expression arg_flags }
                     ( '.pcc_call'|'.nci_call') invokable '\n'
                     | '.invocant' invokable '\n'
                       '.meth_call' method '\n'
                     )
                     { (local_declaration | '.result' target param_flags '\n') }
                     '.pcc_end' '\n'
static void long_return_statement(parser_state *p)

  long_return_statement -> '.pcc_begin_return' '\n'
                           { '.return' expression arg_flags '\n' }
                           '.pcc_end_return' '\n'
static void long_yield_statement(parser_state *p)

  long_yield_statement -> '.pcc_begin_yield' '\n'
                          { '.return' expression arg_flags '\n' }
                          '.pcc_end_yield' '\n'
static void target_statement(parser_state *p)

  target_statement -> target ( '=' assignment
                             | augmented_op expression
                             | keylist '=' expression
                             | arguments
                             )
                             '\n'

  augmented_op     -> '+=' | '-=' | '%=' | '/='  | '//=' | '*='  | '.='
                    | '~=' | '&=' | '|=' | '**=' | '<<=' | '>>=' | '>>>='
static void target_list(parser_state *p)

  target_list -> '(' target param_flags {',' target param_flags } ')'
static void multi_result_invocation(parser_state *p)

  multi-result-invocation -> target_list '=' (invokable arguments | methodcall) '\n'

  invokable -> IDENTIFIER | PREG | STRINGC
static void macro_expansion(parser_state *p)

  macro_expansion -> MACRO_IDENT [ '(' [ expression { ',' expression } ')' ] '\n'
static void get_results_instruction(parser_state *p)

  get_results_instr -> '.get_results' target_list '\n'
static void global_assignment(parser_state *p)

  global_assignment -> 'global' string_value '=' (IDENTIFIER|PREG) '\n'
static void instructions(parser_state *p)

  instructions -> {instruction}

  instruction  -> {LABEL ['\n']} instr

  instr -> if_statement
         | unless_statement
         | local_declaration
         | lex_declaration
         | '.globalconst' const_definition
         | '.const' const_definition
         | open_ns
         | close_ns
         | return_statement
         | yield_statement
         | macro_expansion
         | target_statement
         | STRINGC arguments
         | methodcall
         | long_invocation
         | long_return_statement
         | long_yield_statement
         | 'null' var
         | get_results_instruction
         | global_assignment
         | '\n'

Compilation Units

static void multi_type_list(parser_state *p)

  multi-type-list -> '(' [multi-type {',' multi-type } ] ')'

  multi-type -> IDENTIFIER | STRINGC | keylist | type
static void sub_flags(parser_state *p)

  sub_flags -> [sub_flag { sub_flag } ]

  sub_flag  -> ':anon'
             | ':init'
             | ':load'
             | ':main'
             | ':method'
             | ':lex'
             | ':outer' '(' ( STRINGC | IDENTIFIER )  ')'
             | ':vtable' '(' STRINGC ')'
             | ':multi' multi-type-list
             | ':postcomp'
             | ':immediate'
static void parameters(parser_state *p)

  parameters -> { '.param' parameter[param_flag] '\n' }

  parameter -> type [ STRING_CONSTANT '=>' ] IDENTIFIER
static void sub_definition(parser_state *p)

  sub_definition -> '.sub' (IDENTIFIER | STRINGC) subflags '\n' parameters instructions '.end'
static void emit_block(parser_state *p)

  emit_block -> '.emit' '\n' { parrot_instruction '\n' } '.eom'
static void macro_parameters(parser_state *p)

  macro_parameters -> [ '(' [ id {',' id} ] ')' ]
static void macro_definition(parser_state *p)

 macro-definition -> '.macro' IDENTIFIER macro_parameters '\n' macro_body '.endm'
static void include(parser_state *p)

  include -> '.include' STRINGC
static void pragma(parser_state *p)

  pragma -> '.pragma' 'n_operators' INTC
static void hll_specifier(parser_state *p)

  hll_specifier -> '.HLL' STRINGC ',' STRINGC
static void hll_mapping(parser_state *p)

  hll_mapping -> '.HLL_map' STRINGC ',' STRINGC
static void namespace_declaration(parser_state *p)

  namespace_declaration -> '.namespace' [ '[' STRINGC { (','|';') STRINGC ']' ]
static void loadlib(parser_state *p)

  loadlib -> '.loadlib' STRINGC
static void compilation_unit(parser_state *p)

  compilation_unit -> sub_definition
                    | '.const' const_definition
                    | emit_block
                    | include
                    | macro_definition
                    | pragma
                    | loadlib
                    | namespace_declaration
                    | hll_specifier
                    | hll_mapping
static void program(parser_state *p)

  program -> {'\n'} compilation_unit { '\n' compilation_unit } EOF
void TOP(parser_state *p)

  TOP -> program


parrot