From 7508d611016fc1fc8852e2c24e46a2ba80e964f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A9=BA=E7=99=BD?= <3440771474@qq.com> Date: Tue, 15 Apr 2025 10:17:22 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20pbtxt=E8=A7=A3=E6=9E=90part1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 尹倩 --- .../rust/parser/src/reader/protobuf/mod.rs | 51 ++ .../rust/parser/src/reader/protobuf/text.rs | 469 ++++++++++++++++++ 2 files changed, 520 insertions(+) create mode 100644 plugins/mindstudio-insight-plugins/ModelVis/rust/parser/src/reader/protobuf/mod.rs create mode 100644 plugins/mindstudio-insight-plugins/ModelVis/rust/parser/src/reader/protobuf/text.rs diff --git a/plugins/mindstudio-insight-plugins/ModelVis/rust/parser/src/reader/protobuf/mod.rs b/plugins/mindstudio-insight-plugins/ModelVis/rust/parser/src/reader/protobuf/mod.rs new file mode 100644 index 0000000000..79901de06c --- /dev/null +++ b/plugins/mindstudio-insight-plugins/ModelVis/rust/parser/src/reader/protobuf/mod.rs @@ -0,0 +1,51 @@ +use std::{error::Error, io, num, result, str}; + +mod text; +pub use text::*; + +#[derive(Debug, thiserror::Error)] +pub enum ParseError { + #[error("Syntax error: {detail}")] + Syntax { line: u32, column: u32, detail: String }, + + #[error("String is too short")] + StringTooShort { line: u32, column: u32 }, + + #[error("String is not in quotes")] + StringNotInQuotes { line: u32, column: u32 }, + + #[error("String quotes do not match")] + StringQuotesUnMatch { line: u32, column: u32 }, + + #[error("Could not parse integer: {val}")] + CouldntParseInt { val: String, line: u32, column: u32 }, + + #[error("Could not parse float: {val}")] + CouldntParseFloat { val: String, line: u32, column: u32 }, + + #[error("Couldn't parse enum '{val}'")] + CouldntParseEnum { val: String, line: u32, column: u32 }, + + #[error("IO error: {0}")] + Io(#[from] io::Error), + + #[error("Bad Unicode")] + Utf8Error { line: u32, column: u32 }, + + #[error("Couldn't parse {expected} '{actual}'")] + MismatchedType { expected: String, actual: String, line: u32, column: u32 }, + + #[error("Unknown field: {tag}")] + UnknownField { tag: String, line: u32, column: u32 }, + + #[error("Unexpected token {token}")] + UnexpectedToken { token: String, line: u32, column: u32 }, + + #[error("Invalid depth")] + InvalidDepth { line: u32, column: u32 }, + + #[error("Unexpected Eof")] + UnexpectedEof { line: u32, column: u32 }, +} + +pub type Result = result::Result; diff --git a/plugins/mindstudio-insight-plugins/ModelVis/rust/parser/src/reader/protobuf/text.rs b/plugins/mindstudio-insight-plugins/ModelVis/rust/parser/src/reader/protobuf/text.rs new file mode 100644 index 0000000000..68326235f6 --- /dev/null +++ b/plugins/mindstudio-insight-plugins/ModelVis/rust/parser/src/reader/protobuf/text.rs @@ -0,0 +1,469 @@ +use std::{mem::transmute, str::FromStr}; + +use super::{ParseError, Result}; + +const EOF: &'static str = "eof"; + +pub struct TextReader<'a> { + input: &'a [u8], + pos: usize, + current_token: &'a str, + depth: i32, + array_depth: u32, + line: u32, + column: u32, +} + +impl<'a> TextReader<'a> { + fn syntax_error(&self, detail: String) -> ParseError { + ParseError::Syntax { line: self.line, column: self.column, detail } + } + + fn string_too_short(&self) -> ParseError { + ParseError::StringTooShort { line: self.line, column: self.column } + } + + fn string_not_in_quotes(&self) -> ParseError { + ParseError::StringNotInQuotes { line: self.line, column: self.column } + } + + fn string_quotes_unmatch(&self) -> ParseError { + ParseError::StringQuotesUnMatch { line: self.line, column: self.column } + } + + fn couldnt_parse_int(&self, val: &str) -> ParseError { + ParseError::CouldntParseInt { val: String::from(val), line: self.line, column: self.column } + } + + fn couldnt_parse_float(&self, val: &str) -> ParseError { + ParseError::CouldntParseFloat { + val: String::from(val), + line: self.line, + column: self.column, + } + } + + fn couldnt_parse_enum(&self, val: &str) -> ParseError { + ParseError::CouldntParseEnum { + val: String::from(val), + line: self.line, + column: self.column, + } + } + + fn mismatched_type(&self, expected: &str, actual: &str) -> ParseError { + ParseError::MismatchedType { + expected: String::from(expected), + actual: String::from(actual), + line: self.line, + column: self.column, + } + } + + fn utf8_error(&self) -> ParseError { + ParseError::Utf8Error { line: self.line, column: self.column } + } + + pub fn unknown_field(&self, tag: &str) -> ParseError { + ParseError::UnknownField { tag: String::from(tag), line: self.line, column: self.column } + } + + fn unexpected_token(&self) -> ParseError { + ParseError::UnexpectedToken { + token: String::from(self.current_token), + line: self.line, + column: self.column, + } + } + + fn invalid_depth(&self) -> ParseError { + ParseError::InvalidDepth { line: self.line, column: self.column } + } + + fn unexpected_eof(&self) -> ParseError { + ParseError::UnexpectedEof { line: self.line, column: self.column } + } +} + +impl<'a> TextReader<'a> { + #[inline] + fn current_byte(&self) -> u8 { + self.input[self.pos] + } + + #[inline] + fn has_more(&self) -> bool { + self.pos < self.input.len() + } + + #[inline] + fn advance(&mut self) { + self.pos += 1; + self.column += 1; + } + + #[inline] + fn new_line(&mut self) { + self.pos += 1; + self.line += 1; + self.column = 1; + } +} + +impl<'a> TextReader<'a> { + pub fn start(&mut self) { + if self.depth > 0 { + let _ = self.expect("}"); + } + self.depth += 1 + } + + pub fn end(&mut self) -> Result { + if self.depth <= 0 { + return Err(self.invalid_depth()); + } + + if self.current_token == "}" { + self.expect("}")?; + self.match_token(";"); + self.depth -= 1; + return Ok(true); + } + + if self.current_token == EOF { + if self.depth != 1 { + return Err(self.unexpected_eof()); + } + self.depth -= 1; + return Ok(true); + } + + Ok(false) + } + + fn next_token(&mut self) -> Result<()> { + self.skip_whitespace()?; + + if self.pos >= self.input.len() { + self.current_token = EOF; + return Ok(()); + } + + let byte = self.input[self.pos]; + match byte { + b'a'..=b'z' | b'A'..=b'Z' | b'_' => self.parse_identifier(), + b':' | b'{' | b'}' | b'[' | b']' | b'=' => self.parse_symbol(), + _ => Err(ParseError::Syntax { + line: self.line, + column: self.column, + detail: format!("Unexpected character '{byte:?}'"), + }), + } + } + + pub fn tag(&mut self) -> Result<&'a str> { + let name = self.current_token; + self.next_token()?; + if self.current_token != "[" && self.current_token != "{" { + unsafe { + return transmute::<_, Result<&'a str>>(self.expect(":")); + } + } + Ok(name) + } + + fn skip_whitespace(&mut self) -> Result<()> { + while self.pos < self.input.len() { + match self.input[self.pos] { + b' ' | b'\t' => self.advance(), + b'\n' => self.new_line(), + b'#' => self.skip_comment()?, + _ => break, + } + } + Ok(()) + } + + fn skip_comment(&mut self) -> Result<()> { + while self.pos < self.input.len() { + if self.current_byte() == b'\n' { + break; + } + self.pos += 1; + } + Ok(()) + } + + fn expect(&mut self, s: &str) -> Result<()> { + if self.current_token != s { + return Err( + self.syntax_error(format!("Unexpected '{}' instead of '{s}'", self.current_token)) + ); + } + + self.next_token() + } + + fn match_token(&mut self, val: &str) -> bool { + if self.current_token == val { + let _ = self.next_token(); + return true; + } + + false + } + + fn semicolon(&mut self) { + self.match_token(";"); + } + + fn parse_symbol(&mut self) -> Result<()> { + let symbol = self.current_byte() as char; + self.pos += 1; + self.column += 1; + self.current_token = Box::leak(Box::from(symbol.as_ascii().unwrap().as_str())); + Ok(()) + } + + fn parse_identifier(&mut self) -> Result<()> { + let start = self.pos; + + let first_byte = self.current_byte(); + if !first_byte.is_ascii_alphabetic() && first_byte != b'_' { + return Err(self.syntax_error(format!( + "Identifier must start with alphabetic or underscore, found '{first_byte}'" + ))); + } + + self.advance(); + + while self.pos < self.input.len() { + let byte = self.current_byte(); + match byte.is_ascii_alphanumeric() || byte == b'_' || byte == b'.' { + true => self.advance(), + _ => break, + } + } + + let identifier_bytes = &self.input[start..self.pos]; + let identifier = str::from_utf8(identifier_bytes).map_err(|_| self.utf8_error())?; + + self.current_token = identifier; + + Ok(()) + } +} + +macro_rules! impl_int { + ($name:ident, $ty:ty) => { + pub fn $name(&mut self) -> super::Result<$ty> { + let token = self.current_token; + let ret = <$ty>::from_str(token).map_err(|_| self.couldnt_parse_int(token))?; + + self.next_token()?; + self.semicolon(); + + Ok(ret) + } + }; +} + +macro_rules! impl_float { + ($name:ident, $ty:ty) => { + pub fn $name(&mut self) -> super::Result<$ty> { + let mut token = self.current_token; + let ret = match token { + "nan" => <$ty>::NAN, + "inf" => <$ty>::INFINITY, + "-inf" => <$ty>::NEG_INFINITY, + _ => { + token = token.strip_suffix("f").unwrap_or(token); + <$ty>::from_str(token).map_err(|_| self.couldnt_parse_float(token))? + } + }; + + self.next_token()?; + self.semicolon(); + + Ok(ret) + } + }; +} + +pub trait Enumeration: Sized { + fn from_str(value: &str) -> Option; + fn from_i32(value: i32) -> Option; +} + +impl<'a> TextReader<'a> { + impl_float!(float, f32); + + impl_float!(double, f64); + + impl_int!(int32, i32); + + impl_int!(int64, i64); + + impl_int!(uint32, u32); + + impl_int!(uint64, u64); + + #[inline] + pub fn sint32(&mut self) -> Result { + self.int32() + } + + #[inline] + pub fn sint64(&mut self) -> Result { + self.int64() + } + + #[inline] + pub fn fixed32(&mut self) -> Result { + self.uint32() + } + + #[inline] + pub fn fixed64(&mut self) -> Result { + self.uint64() + } + + #[inline] + pub fn sfixed32(&mut self) -> Result { + self.int32() + } + + #[inline] + pub fn sfixed64(&mut self) -> Result { + self.int64() + } + + pub fn string(&mut self) -> Result<&'a str> { + let token = self.current_token; + + if token.len() < 2 { + return Err(self.string_too_short()); + } + + let quote = token.chars().next().unwrap(); + if quote != '\'' && quote != '"' { + return Err(self.string_not_in_quotes()); + } + + if token.chars().last() != Some(quote) { + return Err(self.string_quotes_unmatch()); + } + + let value = &token[1..token.len() - 1]; + self.next_token()?; + self.semicolon(); + + Ok(value) + } + + pub fn bool(&mut self) -> Result { + let token = &self.current_token; + match *token { + "true" | "True" | "1" => { + self.next_token()?; + self.semicolon(); + Ok(true) + } + "false" | "False" | "0" => { + self.next_token()?; + self.semicolon(); + Ok(false) + } + _ => Err(self.mismatched_type("bool", token)), + } + } + + pub fn enumeration(&mut self) -> Result { + let token = self.current_token; + + if let Some(variant) = T::from_str(token) { + self.next_token()?; + self.semicolon(); + return Ok(variant); + } + + if let Ok(x) = token.parse::() { + if let Some(variant) = T::from_i32(x) { + self.next_token()?; + self.semicolon(); + return Ok(variant); + } + } + + Err(self.couldnt_parse_enum(token)) + } + + pub fn bytes(&mut self) -> Result> { + Ok(self.string()?.as_bytes().to_vec()) + } +} + +pub trait FromText<'a>: Sized { + fn from_text(reader: &mut TextReader<'a>) -> Result; +} + +#[macro_export] +macro_rules! impl_from_text { + ($ty: ty, $func: ident) => { + impl FromText<'_> for $ty { + fn from_text(reader: &mut TextReader<'_>) -> Result { + reader.$func() + } + } + }; +} + +impl_from_text!(i32, int32); +impl_from_text!(i64, int64); +impl_from_text!(u64, uint64); +impl_from_text!(f32, float); +impl_from_text!(f64, double); +impl_from_text!(Vec, bytes); + +impl<'a> FromText<'a> for &'a str { + fn from_text(reader: &mut TextReader<'a>) -> Result { + reader.string() + } +} + +impl<'a> TextReader<'a> { + fn first(&mut self) -> bool { + if self.match_token("[") { + self.array_depth += 1; + return true; + } + + false + } + + fn last(&mut self) -> bool { + if self.match_token("]") { + self.array_depth -= 1; + return true; + } + + false + } + + pub fn array>(&mut self, collection: &mut Vec) -> Result<()> { + match self.first() { + true => + while !self.last() { + collection.push(FromText::from_text(self)?); + match self.current_token { + "," => self.next_token()?, + "]" => {} + _ => return Err(self.unexpected_token()), + } + }, + false => collection.push(FromText::from_text(self)?), + } + + Ok(()) + } +} -- Gitee