gmflib: first stab at gma lex/parse

main
q3k 2022-04-18 14:01:13 +00:00
parent a2d47206ce
commit 462c78ac86
7 changed files with 657 additions and 13 deletions

View File

@ -8,3 +8,4 @@ edition = "2021"
[dependencies]
serde = { version = "1.0", features = ["derive"] }
gmfmacros = { path = "../gmfmacros" }
byteorder = "1"

162
gmflib/src/gma/lex.rs Normal file
View File

@ -0,0 +1,162 @@
#[derive(Debug,PartialEq,Eq,Clone)]
pub struct Token<'i> {
pub kind: TokenKind<'i>,
pub offs: usize,
}
#[derive(Debug,PartialEq,Eq,Clone)]
pub enum TokenKind<'i> {
Tab,
Space,
Newline,
Word(&'i str),
}
impl<'i> TokenKind<'i> {
pub fn len(&self) -> usize {
match self {
TokenKind::Tab => 1,
TokenKind::Space => 1,
TokenKind::Newline => 1,
TokenKind::Word(w) => w.len(),
}
}
pub fn explain(&self) -> String {
match self {
TokenKind::Tab => "tab".to_string(),
TokenKind::Space => "space".to_string(),
TokenKind::Newline => "newline".to_string(),
TokenKind::Word(w) => format!("literal '{}'", w),
}
}
}
pub struct Tokens<'i> {
pub tokens: Vec<Token<'i>>,
pub underlying: &'i str,
}
impl<'i> Tokens<'i> {
pub fn parse(s: &'i str) -> Self {
let mut res: Vec<Token<'i>> = vec![];
let mut cur: Option<usize> = None;
for (i, ch) in s.char_indices() {
if let Some(c) = cur {
// Continue word.
let n = match ch {
'\t' => Some(Token {
kind: TokenKind::Tab,
offs: i,
}),
' ' => Some(Token {
kind: TokenKind::Space,
offs: i,
}),
'\n' => Some(Token {
kind: TokenKind::Newline,
offs: i,
}),
_ => None,
};
if let Some(t) = n {
res.push(Token {
kind: TokenKind::Word(&s[c..i]),
offs: c
});
res.push(t);
cur = None;
}
} else {
// Begin new token if necessary.
match ch {
'\t' => res.push(Token {
kind: TokenKind::Tab,
offs: i,
}),
' ' => res.push(Token {
kind: TokenKind::Space,
offs: i,
}),
'\n' => res.push(Token {
kind: TokenKind::Newline,
offs: i,
}),
_ => cur = Some(i),
}
}
}
match cur {
None => (),
Some(c) => res.push(Token {
kind: TokenKind::Word(&s[c..]),
offs: c,
}),
}
Self {
tokens: res,
underlying: s,
}
}
pub fn view<'p>(&'p self) -> TokensView<'i, 'p> {
TokensView {
parent: &self,
start: 0,
}
}
}
pub struct TokensView<'i, 'p> {
pub parent: &'p Tokens<'i>,
pub start: usize,
}
impl<'i, 'p> TokensView<'i, 'p> {
pub fn iter<'b>(&'b self) -> TokenIterator<'i, 'b>
where
'p: 'b
{
TokenIterator {
cur: self.start,
end: self.parent.tokens.len(),
underlying: &self.parent.tokens,
}
}
pub fn len(&self) -> usize {
return self.parent.tokens.len() - self.start
}
pub fn sub(&self, split: usize) -> TokensView<'i, 'p> {
TokensView {
parent: &self.parent,
start: split + self.start,
}
}
}
pub struct TokenIterator<'i, 'p>
where
'i: 'p
{
cur: usize,
end: usize,
underlying: &'p Vec<Token<'i>>,
}
impl<'i, 'p> Iterator for TokenIterator<'i, 'p>
{
type Item = &'p Token<'i>;
fn next(&mut self) -> Option<&'p Token<'i>> {
if self.cur >= self.end {
return None;
}
let res = Some(&self.underlying[self.cur]);
self.cur += 1;
res
}
}

View File

@ -1,5 +1,89 @@
use std::io::Write;
pub mod lex;
pub mod parse;
#[cfg(test)]
mod tests {
use super::lex::{
TokenKind, Tokens
};
use super::parse::{
Pattern, Captured,
Stanza, EOL,
};
const HEADER: &'static str = "GMA
*GABRIEL_ASCIIEXPORT\t3
*MODEL_TYPE\tBasic Model
*SCENE
{
\t*SCENE_FILENAME\tfoo.max
}
";
#[test]
fn test_lex() {
let parsed = Tokens::parse(HEADER);
let kinds: Vec<TokenKind> = parsed.tokens.into_iter().map(|t| t.kind).collect();
assert_eq!(kinds, vec![
TokenKind::Word("GMA"), TokenKind::Newline,
TokenKind::Word("*GABRIEL_ASCIIEXPORT"), TokenKind::Tab, TokenKind::Word("3"), TokenKind::Newline,
TokenKind::Word("*MODEL_TYPE"), TokenKind::Tab, TokenKind::Word("Basic"), TokenKind::Space, TokenKind::Word("Model"), TokenKind::Newline,
TokenKind::Word("*SCENE"), TokenKind::Newline,
TokenKind::Word("{"), TokenKind::Newline,
TokenKind::Tab, TokenKind::Word("*SCENE_FILENAME"), TokenKind::Tab, TokenKind::Word("foo.max"), TokenKind::Newline,
TokenKind::Word("}"), TokenKind::Newline,
]);
}
#[test]
fn test_parse_simple() {
let example = Tokens::parse(HEADER);
let view = example.view();
let res = match vec![
Box::new(TokenKind::Word("GMA")) as Box<dyn Pattern>,
Box::new(TokenKind::Newline),
Box::new(Stanza("*GABRIEL_ASCIIEXPORT", (TokenKind::Word("3"),))),
Box::new(Stanza("*MODEL_TYPE", (EOL,))),
// TODO blocks
Box::new(TokenKind::Word("*SCENE")), Box::new(TokenKind::Newline),
Box::new(TokenKind::Word("{")), Box::new(TokenKind::Newline),
Box::new(Stanza("*SCENE_FILENAME", (EOL,))),
Box::new(TokenKind::Word("}")), Box::new(TokenKind::Newline),
].matches(&view) {
Ok(v) => v,
Err(e) => {
e.explain(&example, "test_txt.gmf");
panic!("parse failed");
}
};
let rest = view.sub(res.consumed);
assert_eq!(rest.len(), 0);
println!("{:#?}", res.captured);
let v = match res.captured {
Captured::Vec(v) => v,
_ => panic!("expected top-level vec"),
};
assert_eq!(v.len(), 11);
match &v[2] {
Captured::Stanza { key, .. } => {
assert_eq!(*key, "*GABRIEL_ASCIIEXPORT");
},
_ => panic!("expected stanza in 2nd element"),
}
match &v[3] {
Captured::Stanza { key, values, .. } => {
assert_eq!(*key, "*MODEL_TYPE");
assert_eq!(values.as_ref(), &Captured::Tuple1(Box::new(Captured::Str("Basic Model"))));
},
_ => panic!("expected stanza in 3rd element"),
}
}
}
#[derive(Debug, Clone)]
pub struct WriteError {
pub msg: String,

398
gmflib/src/gma/parse.rs Normal file
View File

@ -0,0 +1,398 @@
use crate::gma::lex::{
Token, TokenKind, Tokens, TokensView,
};
#[derive(Debug)]
pub struct Match<'i> {
pub consumed: usize,
pub captured: Captured<'i>,
}
pub type MatchResult<'i> = Result<Match<'i>, MatchError>;
#[derive(Debug,PartialEq,Eq)]
pub enum Captured<'i> {
// low level
Void,
Vec(Vec<Captured<'i>>),
Tuple1(Box<Captured<'i>>),
Tuple2(Box<Captured<'i>>, Box<Captured<'i>>),
Tuple3(Box<Captured<'i>>, Box<Captured<'i>>, Box<Captured<'i>>),
Tuple4(Box<Captured<'i>>, Box<Captured<'i>>, Box<Captured<'i>>, Box<Captured<'i>>),
Token(Token<'i>),
// mid level
U32(u32),
// high level
Str(&'i str),
Stanza {
indent: Vec<Captured<'i>>,
key: &'i str,
values: Box<Captured<'i>>,
}
}
#[derive(Debug)]
pub enum MatchError {
PatternMismatch {
token_offset: usize,
wanted: Box<dyn Pattern>,
},
EOF {
token_offset: usize,
},
}
impl MatchError {
pub fn explain<'i>(&self, tokens: &Tokens<'i>, filename: &str) {
let offset = *match self {
MatchError::PatternMismatch { token_offset, ..} => token_offset,
MatchError::EOF { token_offset } => token_offset,
};
let token = &tokens.tokens[offset];
let byte_offset = token.offs;
let lines: Vec<&str> = tokens.underlying.split("\n").collect();
let mut rolling = 0usize;
let mut line_no: Option<usize> = None;
let mut byte_in_line: Option<usize> = None;
for (i, line) in lines.iter().enumerate() {
let start = rolling;
let end = rolling + line.bytes().len();
if start <= byte_offset && byte_offset <= end {
line_no = Some(i);
byte_in_line = Some(byte_offset - start);
break;
}
// TODO: fix assumption of no consecutive newlines?
rolling = end + 1;
}
let line_no = line_no.unwrap();
let byte_in_line = byte_in_line.unwrap();
// Replace all tabs with four spaces when visualizing the line, and add to byte_in_line to
// compensate.
let line = lines[line_no];
let visual = byte_in_line + lines[line_no][..byte_in_line].matches("\t").count() * 3;
let line = line.replace("\t", " ");
let msg = match self {
MatchError::PatternMismatch { wanted, .. } => format!("unexpected {}, wanted {}", token.kind.explain(), wanted.explain()),
MatchError::EOF{ .. } => format!("unexpected end of flie"),
};
println!("error: {}", msg);
println!(" --> {}:{}:{}", filename, line_no+1, byte_in_line);
println!(" |");
println!(" {:>5} | {}", line_no+1, line);
println!(" | {}{}",
" ".repeat(visual),
"^".repeat(token.kind.len()));
}
}
struct TokenReader<'i, 'p> {
view: TokensView<'i, 'p>,
consumed: usize,
}
impl<'i, 'p> TokenReader<'i, 'p> {
fn new(view: &TokensView<'i, 'p>) -> Self {
Self {
view: view.sub(0),
consumed: 0,
}
}
fn expect<P: Pattern + ?Sized>(&mut self, pattern: &P) -> Result<Captured<'i>, MatchError> {
let m = pattern.matches(&self.view)?;
self.view = self.view.sub(m.consumed);
self.consumed += m.consumed;
Ok(m.captured)
}
fn done(self, captured: Captured<'i>) -> MatchResult {
Ok(Match {
consumed: self.consumed,
captured,
})
}
}
pub trait Pattern: core::fmt::Debug {
fn matches<'i, 'p>(&self, l: &TokensView<'i, 'p>) -> MatchResult<'i>;
fn explain(&self) -> String;
}
#[derive(Debug,Clone)]
struct Whitespace;
impl Pattern for Whitespace {
fn matches<'i, 'p>(&self, l: &TokensView<'i, 'p>) -> MatchResult<'i> {
// Not using TokenReader here as we don't have alternatives support yet.
let mut res: Vec<Captured<'i>> = vec![];
for t in l.iter() {
match t.kind {
TokenKind::Tab | TokenKind::Space => {
res.push(Captured::Token(t.clone()))
},
_ => break,
}
}
if res.len() > 0 {
Ok(Match {
consumed: res.len(),
captured: Captured::Vec(res),
})
} else {
Err(MatchError::PatternMismatch {
token_offset: l.start,
wanted: Box::new(self.clone()),
})
}
}
fn explain(&self) -> String {
"space or tab".to_string()
}
}
impl Pattern for TokenKind<'static> {
fn matches<'i, 'p>(&self, l: &TokensView<'i, 'p>) -> MatchResult<'i> {
// Not using TokenReader as this is a terminal.
if l.len() < 1 {
return Err(MatchError::EOF {
token_offset: l.start,
})
}
let v = l.iter().next().unwrap();
if v.kind != *self {
return Err(MatchError::PatternMismatch {
token_offset: l.start,
wanted: Box::new(self.clone()),
})
}
return Ok(Match {
consumed: 1,
captured: Captured::Token(v.clone()),
})
}
fn explain(&self) -> String {
self.explain()
}
}
impl Pattern for Vec<Box<dyn Pattern>> {
fn matches<'i, 'p>(&self, l: &TokensView<'i, 'p>) -> MatchResult<'i> {
let mut res: Vec<Captured<'i>> = vec![];
let mut r = TokenReader::new(l);
for t in self.iter() {
res.push(r.expect(t.as_ref())?);
}
r.done(Captured::Vec(res))
}
fn explain(&self) -> String {
format!("sequence [{}]", self
.iter().map(|el| el.explain())
.collect::<Vec<String>>().join(","))
}
}
impl Pattern for () {
fn matches<'i, 'p>(&self, l: &TokensView<'i, 'p>) -> MatchResult<'i> {
let r = TokenReader::new(l);
r.done(Captured::Void)
}
fn explain(&self) -> String {
"nothing".to_string()
}
}
macro_rules! mktuple {
( $cons:ident, $count:expr, [ $($ty:ident),+ ], [ $($name:ident),+ ] ) => {
impl <$($ty),*> Pattern for ( $($ty),+ , )
where $( $ty : Pattern ),+
{
fn matches<'i, 'p>(&self, l: &TokensView<'i, 'p>) -> MatchResult<'i> {
let mut r = TokenReader::new(l);
let mut i = 0;
let ( $( $name ),+ , ) = self;
$(
let $name = r.expect($name)?;
i += 1;
if i != $count {
r.expect(&Whitespace)?;
}
)+
r.done(Captured::$cons(
$(
Box::new($name)
),+
))
}
fn explain(&self) -> String {
let ( $( $name ),+ , ) = self;
format!("sequence of ({})", vec![
$(
$name.explain()
),+
].join(","))
}
}
}
}
mktuple!(Tuple1, 1, [A], [a]);
mktuple!(Tuple2, 2, [A, B], [a, b]);
mktuple!(Tuple3, 3, [A, B, C], [a, b, c]);
mktuple!(Tuple4, 4, [A, B, C, D], [a, b, c, d]);
#[derive(Debug)]
pub struct Stanza<T: Pattern>(pub &'static str, pub T);
impl<T: Pattern> Pattern for Stanza<T> {
fn matches<'i, 'p>(&self, l: &TokensView<'i, 'p>) -> MatchResult<'i> {
let mut r = TokenReader::new(l);
let indent = r.expect(&Whitespace)
.map(|m| match m {
Captured::Vec(v) => v,
_ => unreachable!(),
})
.unwrap_or(vec![]);
let key = match r.expect(&TokenKind::Word(self.0))? {
Captured::Token(Token {
kind: TokenKind::Word(w),
..
}) => w,
_ => unreachable!(),
};
r.expect(&Whitespace)?;
let values = Box::new(r.expect(&self.1)?);
r.expect(&Whitespace).ok();
r.expect(&TokenKind::Newline)?;
r.done(Captured::Stanza {
indent, key, values,
})
}
fn explain(&self) -> String {
format!("stanza {} containing {}", self.0, self.1.explain())
}
}
#[derive(Debug)]
pub struct EOL;
impl Pattern for EOL {
fn matches<'i, 'p>(&self, l: &TokensView<'i, 'p>) -> MatchResult<'i> {
// Not using TokenReader as we don't have expect_not.
let mut start: Option<usize> = None;
let mut end: Option<usize> = None;
let mut consumed = 0usize;
for t in l.iter() {
if t.kind == TokenKind::Newline {
end = Some(t.offs);
break
}
if start.is_none() {
start = Some(t.offs);
}
consumed += 1;
}
// TODO: handle EOF. This will panic right now.
let s = &l.parent.underlying[start.unwrap()..end.unwrap()];
Ok(Match {
consumed,
captured: Captured::Str(s),
})
}
fn explain(&self) -> String {
"string until newline".to_string()
}
}
#[derive(Debug,Clone)]
pub struct Word;
impl Pattern for Word {
fn matches<'i, 'p>(&self, l: &TokensView<'i, 'p>) -> MatchResult<'i> {
// Not using TokenReader as this is effectively (but not really) a terminal, too (ie., it's
// TokenKind::Word but we don't have GADTs)
if l.len() < 1 {
return Err(MatchError::EOF {
token_offset: l.start,
});
}
let v = l.iter().next().unwrap();
match v.kind {
TokenKind::Word(_) => Ok(Match {
consumed: 1,
captured: Captured::Token(v.clone()),
}),
_ => Err(MatchError::PatternMismatch {
token_offset: l.start,
wanted: Box::new(self.clone()),
}),
}
}
fn explain(&self) -> String {
"a word".to_string()
}
}
pub trait GMANum: core::fmt::Debug + Clone + 'static {
fn gma_parse(s: &str) -> Option<Captured<'_>>;
fn explain() -> String;
}
impl GMANum for u32 {
fn gma_parse(s: &str) -> Option<Captured<'_>> {
s.parse::<u32>().ok().map(|v| Captured::U32(v))
}
fn explain() -> String {
"number (unsigned, 32-bit)".to_string()
}
}
#[derive(Debug, Clone)]
pub struct Num<N: GMANum>(std::marker::PhantomData<N>);
impl<N: GMANum> Pattern for Num<N> {
fn matches<'i, 'p>(&self, l: &TokensView<'i, 'p>) -> MatchResult<'i> {
let mut r = TokenReader::new(l);
let n = match r.expect(&Word)? {
Captured::Token(Token {
kind: TokenKind::Word(w),
..
}) => N::gma_parse(w),
_ => unreachable!(),
};
match n {
None => Err(MatchError::PatternMismatch {
wanted: Box::new(self.clone()),
token_offset: l.start,
}),
Some(i) => r.done(i),
}
}
fn explain(&self) -> String {
N::explain()
}
}
impl<N: GMANum> Num<N> {
pub fn new() -> Self {
Self(std::marker::PhantomData)
}
}

View File

@ -51,7 +51,7 @@ impl<const N: usize> Serializable for [f32; N] {
fn read<R: std::io::Read>(r: &mut ReadStream<R>) -> ReadResult<Self> {
let mut buffer = [0f32; N];
r.backing.read_f32_into::<byteorder::LittleEndian>(&mut buffer[..]).map_err(|e| {
r.error("eof")
r.error(format!("{:?}", e))
})?;
r.pos += 4 * N;
Ok(buffer)

View File

@ -1,6 +1,11 @@
mod gmi;
mod gma;
// TODO: unpub this once it's being used internally (pubbed to silence dead code warnings).
pub mod gma;
pub mod types;
pub use types::*;
pub use gmi::ReadError as GMIReadError;
//#[macro_use] extern crate lalrpop_util;
//
//lalrpop_mod!(pub grammar);

View File

@ -4,12 +4,11 @@ fn main() {
env_logger::Env::default().filter_or(env_logger::DEFAULT_FILTER_ENV, "info"));
let args: Vec<String> = std::env::args().collect();
if args.len() != 3 {
log::error!("Usage: {} foo.gmf foo_txt.gmf", args[0]);
if args.len() != 2 {
log::error!("Usage: {} foo.gmf", args[0]);
return;
}
let gmf_path = &args[1];
let txt_gmf_path = &args[2];
let f = match std::fs::File::open(gmf_path) {
Ok(f) => f,
@ -18,13 +17,6 @@ fn main() {
return;
},
};
let mut res = match std::fs::File::create(txt_gmf_path) {
Ok(f) => f,
Err(e) => {
log::error!("Could not write {}: {:?}", txt_gmf_path, e);
return;
},
};
let gmf = match gmflib::GMF::read_gmi(f) {
Ok(gmf) => gmf,
@ -37,7 +29,9 @@ fn main() {
return;
},
};
if let Err(e) = gmf.write_gma(&mut res) {
let mut buf: Vec<u8> = vec![];
if let Err(e) = gmf.write_gma(&mut buf) {
log::error!("Writing GMA failed: {}", e.msg);
}
}