02-02-2017, 07:41 PM
Code:
use std::fmt;
use std::string::String;
use std::vec::Vec;
#[derive(PartialEq, Clone)]
enum TokenType {
Terminal, Nonterminal,
To,
Choice,
Optional, Optionals, More,
Lparen, Rparen,
Terminator,
Eof
}
impl fmt::Display for TokenType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", match *self {
TokenType::Terminal => "Terminal",
TokenType::Nonterminal => "Nonterminal",
TokenType::To => "To",
TokenType::Choice => "Choice",
TokenType::Optional => "Optional",
TokenType::Optionals => "Optionals",
TokenType::More => "More",
TokenType::Lparen => "Lparen",
TokenType::Rparen => "Rparen",
TokenType::Terminator => "Terminator",
TokenType::Eof => "Eof"
})
}
}
#[derive(Clone)]
struct Token {
token_type: TokenType,
value: String //,
// row: u32,
// column: u32
}
#[derive(PartialEq, Clone)]
enum NodeType {
Grammar,
Production,
Choice,
Expression,
Unit,
Terminal
}
/*
impl fmt::Display for NodeType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", match *self {
NodeType::Grammar => "grammar",
NodeType::Production => "production",
NodeType::Expression => "expression",
NodeType::Unit => "unit"
})
}
}
*/
#[derive(Clone)]
struct Node {
node_type: NodeType,
value: String,
children: Vec<Node>
}
fn node_print(node: &Node) -> String {
let mut result = format!("[{}", node.value);
for child in &node.children {
result = format!("{} {}", result, node_print(&child));
}
result = format!("{}]", result);
result
}
/*
impl Node {
fn new(new_type: NodeType) -> Node {
Node {
node_type: new_type,
value:
children: Vec::new()
}
}
}
*/
fn advance(source: &String, character: &mut char, position: &mut usize, length: &usize) {
*position += 1;
if *position >= *length {
*character = 0 as char;
} else {
*character = source.chars().nth(*position).unwrap();
}
}
fn back(source: &String, character: &mut char, position: &mut usize, length: &usize) {
if *position == 0 {
*character = 0 as char;
} else {
*character = source.chars().nth(*position).unwrap();
}
}
fn get_terminal(source: &String, character: &mut char, position: &mut usize, length: &usize) -> String {
let mut result = String::new();
while character.is_uppercase() {
result.push(*character);
advance(source, character, position, length);
if character.is_lowercase() {
panic!("Lowercase in uppercase");
}
}
result
}
fn get_nonterminal(source: &String, character: &mut char, position: &mut usize, length: &usize) -> String {
let mut result = String::new();
while character.is_lowercase() {
result.push(*character);
advance(source, character, position, length);
if character.is_uppercase() {
panic!("Uppercase in lowercase");
}
}
result
}
fn get_next_token(source: &String, character: &mut char, position: &mut usize, length: &usize) -> Token {
while *character == ' ' || *character == '\n' || *character == '\r' {
advance(source, character, position, length);
}
match *character {
ch if ch.is_lowercase() => {
let token = Token {
token_type: TokenType::Nonterminal,
value: get_nonterminal(source, character, position, length)
};
token
},
ch if ch.is_uppercase() => {
let token = Token {
token_type: TokenType::Terminal,
value: get_terminal(source, character, position, length)
};
token
},
'-' => {
advance(source, character, position, length);
if *character == '>' {
advance(source, character, position, length);
Token {
token_type: TokenType::To,
value: String::from("->")
}
} else {
panic!("temp error");
}
},
'|' => {
advance(source, character, position, length);
Token {
token_type: TokenType::Choice,
value: String::from("|")
}
},
'(' => {
advance(source, character, position, length);
Token {
token_type: TokenType::Lparen,
value: String::from("(")
}
},
')' => {
advance(source, character, position, length);
Token {
token_type: TokenType::Rparen,
value: String::from(")")
}
},
'?' => {
advance(source, character, position, length);
Token {
token_type: TokenType::Optional,
value: String::from("?")
}
},
'+' => {
advance(source, character, position, length);
Token {
token_type: TokenType::More,
value: String::from("+")
}
},
'*' => {
advance(source, character, position, length);
Token {
token_type: TokenType::Optionals,
value: String::from("*")
}
},
';' => {
advance(source, character, position, length);
Token {
token_type: TokenType::Terminator,
value: String::from(";")
}
},
_ => Token {
token_type: TokenType::Eof,
value: String::from("<EOF>")
}
}
}
fn get_tokens(source: &String) -> Vec<Token> {
let mut character = source.chars().nth(0).unwrap();
let mut position: usize = 0;
let length = source.len();
let mut tokens = Vec::new();
loop {
let token = get_next_token(source, &mut character, &mut position, &length);
if token.token_type == TokenType::Eof {
tokens.push(token);
break;
} else {
tokens.push(token);
}
}
tokens
}
fn advance_token(tokens: &Vec<Token>, current_token: &mut Token, position: &mut usize) {
*position += 1;
if *position >= tokens.len() {
panic!("Requested out of bounds token");
} else {
*current_token = tokens.get(*position).unwrap().clone();
}
}
fn expect(tokens: &Vec<Token>, current_token: &mut Token, position: &mut usize, token_type: TokenType) {
if current_token.token_type == token_type {
advance_token(tokens, current_token, position);
} else {
panic!("Expect error: {}", token_type);
}
}
fn parse_unit(tokens: &Vec<Token>, current_token: &mut Token, position: &mut usize) -> Node {
let mut node = match current_token.token_type {
TokenType::Terminal => {
let token = current_token.clone();
advance_token(tokens, current_token, position);
Node {
node_type: NodeType::Unit,
value: String::from("unit"),
children: vec!(Node {
node_type: NodeType::Terminal,
value: token.value,
children: Vec::new()
})
}
},
TokenType::Nonterminal => {
let token = current_token.clone();
advance_token(tokens, current_token, position);
Node {
node_type: NodeType::Unit,
value: String::from("unit"),
children: vec!(Node {
node_type: NodeType::Terminal,
value: token.value,
children: Vec::new()
})
}
},
TokenType::Lparen => {
advance_token(tokens, current_token, position);
let choice = parse_choice(tokens, current_token, position);
expect(tokens, current_token, position, TokenType::Rparen);
Node {
node_type: NodeType::Unit,
value: String::from("unit"),
children: vec!(Node {
node_type: NodeType::Terminal,
value: String::from("("),
children: Vec::new()
}, choice, Node {
node_type: NodeType::Terminal,
value: String::from(")"),
children: Vec::new()
})
}
},
_ => panic!("Unit error -- default case: {}", current_token.token_type)
};
while current_token.token_type == TokenType::Optional || current_token.token_type == TokenType::Optionals || current_token.token_type == TokenType::More {
match current_token.token_type {
TokenType::Optional => {
node = Node {
node_type: NodeType::Unit,
value: String::from("unit"),
children: vec!(node, Node {
node_type: NodeType::Terminal,
value: String::from("?"),
children: Vec::new()
})
};
},
TokenType::Optionals => {
node = Node {
node_type: NodeType::Unit,
value: String::from("unit"),
children: vec!(node, Node {
node_type: NodeType::Terminal,
value: String::from("*"),
children: Vec::new()
})
};
},
TokenType::More => {
node = Node {
node_type: NodeType::Unit,
value: String::from("unit"),
children: vec!(node, Node {
node_type: NodeType::Terminal,
value: String::from("+"),
children: Vec::new()
})
};
},
_ => panic!("Invalid state")
}
advance_token(tokens, current_token, position);
}
node
}
fn parse_expression(tokens: &Vec<Token>, current_token: &mut Token, position: &mut usize) -> Node {
let mut node = Node {
node_type: NodeType::Expression,
value: String::from("expression"),
children: vec!(parse_unit(tokens, current_token, position))
};
while current_token.token_type != TokenType::Choice && current_token.token_type != TokenType::Rparen && current_token.token_type != TokenType::Terminator && current_token.token_type != TokenType::Eof {
node = Node {
node_type: NodeType::Expression,
value: String::from("expression"),
children: vec!(node, parse_unit(tokens, current_token, position))
};
}
node
}
fn parse_choice(tokens: &Vec<Token>, current_token: &mut Token, position: &mut usize) -> Node {
let mut node = Node {
node_type: NodeType::Choice,
value: String::from("choice"),
children: vec!(parse_expression(tokens, current_token, position))
};
while current_token.token_type == TokenType::Choice {
advance_token(tokens, current_token, position);
node = Node {
node_type: NodeType::Choice,
value: String::from("choice"),
children: vec!(node, Node {
node_type: NodeType::Terminal,
value: String::from("|"),
children: Vec::new()
}, parse_expression(tokens, current_token, position))
};
}
node
}
fn parse_production(tokens: &Vec<Token>, current_token: &mut Token, position: &mut usize) -> Node {
let terminal = current_token.clone();
expect(tokens, current_token, position, TokenType::Nonterminal);
let to = current_token.clone();
expect(tokens, current_token, position, TokenType::To);
let choice = parse_choice(tokens, current_token, position);
expect(tokens, current_token, position, TokenType::Terminator);
Node {
node_type: NodeType::Production,
value: String::from("production"),
children: vec!(Node {
node_type: NodeType::Terminal,
value: terminal.value,
children: Vec::new()
}, Node {
node_type: NodeType::Terminal,
value: to.value,
children: Vec::new()
}, choice)
}
}
fn parse_grammar(tokens: &Vec<Token>, current_token: &mut Token, position: &mut usize) -> Node {
let mut node = Node {
node_type: NodeType::Grammar,
value: String::from("grammar"),
children: Vec::new()
};
while current_token.token_type != TokenType::Eof {
node.children.push(parse_production(tokens, current_token, position));
}
node
}
pub fn parse(grammar: &String) -> String {
let tokens = get_tokens(grammar);
let mut result = String::from("Tokens:\n");
for token in &tokens {
result = format!("{}{} : {}\n", result, token.token_type, token.value);
}
let mut current_token = tokens.get(0).unwrap().clone();
let mut position: usize = 0;
result = format!("{}Parse tree:\n", result);
let grammar_tree = parse_grammar(&tokens, &mut current_token, &mut position);
result = format!("{}{}", result, node_print(&grammar_tree));
result
}