You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
278 lines
7.9 KiB
278 lines
7.9 KiB
//
|
|
// Lex.cs
|
|
//
|
|
// Author: Kees van Spelde <sicos2002@hotmail.com>
|
|
//
|
|
// Copyright (c) 2013-2018 Magic-Sessions. (www.magic-sessions.com)
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in
|
|
// all copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NON INFRINGEMENT. IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
// THE SOFTWARE.
|
|
//
|
|
|
|
using System;
|
|
using System.Globalization;
|
|
using System.IO;
|
|
using System.Text;
|
|
|
|
namespace MsgReader.Rtf
|
|
{
|
|
/// <summary>
|
|
/// Rtf lex
|
|
/// </summary>
|
|
internal class Lex
|
|
{
|
|
#region Fields
|
|
private const int Eof = -1;
|
|
private readonly TextReader _reader;
|
|
#endregion
|
|
|
|
#region Constructor
|
|
/// <summary>
|
|
/// Initialize instance
|
|
/// </summary>
|
|
/// <param name="reader">reader</param>
|
|
public Lex(TextReader reader)
|
|
{
|
|
_reader = reader;
|
|
}
|
|
#endregion
|
|
|
|
#region PeekTokenType
|
|
/// <summary>
|
|
/// Peek to see what kind of token we have
|
|
/// </summary>
|
|
/// <returns>TokenType</returns>
|
|
public RtfTokenType PeekTokenType()
|
|
{
|
|
var c = _reader.Peek();
|
|
|
|
while (c == '\r'
|
|
|| c == '\n'
|
|
|| c == '\t'
|
|
|| c == '\0')
|
|
{
|
|
_reader.Read();
|
|
c = _reader.Peek();
|
|
}
|
|
|
|
if (c == Eof)
|
|
return RtfTokenType.Eof;
|
|
|
|
switch (c)
|
|
{
|
|
case '{':
|
|
return RtfTokenType.GroupStart;
|
|
|
|
case '}':
|
|
return RtfTokenType.GroupEnd;
|
|
|
|
case '\\':
|
|
return RtfTokenType.Control;
|
|
|
|
default:
|
|
return RtfTokenType.Text;
|
|
}
|
|
}
|
|
#endregion
|
|
|
|
#region NextToken
|
|
/// <summary>
|
|
/// Read next token
|
|
/// </summary>
|
|
/// <returns>token</returns>
|
|
public Token NextToken()
|
|
{
|
|
var token = new Token();
|
|
|
|
var c = _reader.Read();
|
|
|
|
while (c == '\r'
|
|
|| c == '\n'
|
|
|| c == '\t'
|
|
|| c == '\0')
|
|
c = _reader.Read();
|
|
|
|
if (c != Eof)
|
|
switch (c)
|
|
{
|
|
case '{':
|
|
token.Type = RtfTokenType.GroupStart;
|
|
break;
|
|
|
|
case '}':
|
|
token.Type = RtfTokenType.GroupEnd;
|
|
break;
|
|
|
|
case '\\':
|
|
ParseKeyword(token);
|
|
break;
|
|
|
|
default:
|
|
token.Type = RtfTokenType.Text;
|
|
ParseText(c, token);
|
|
break;
|
|
}
|
|
else
|
|
token.Type = RtfTokenType.Eof;
|
|
|
|
return token;
|
|
}
|
|
#endregion
|
|
|
|
#region ParseKeyword
|
|
/// <summary>
|
|
/// Parse keyword from token
|
|
/// </summary>
|
|
/// <param name="token"></param>
|
|
private void ParseKeyword(Token token)
|
|
{
|
|
var ext = false;
|
|
var c = _reader.Peek();
|
|
|
|
if (!char.IsLetter((char) c))
|
|
{
|
|
_reader.Read();
|
|
if (c == '*')
|
|
{
|
|
// Expand keyword
|
|
token.Type = RtfTokenType.Keyword;
|
|
_reader.Read();
|
|
ext = true;
|
|
}
|
|
else
|
|
{
|
|
if (c == '\\' || c == '{' || c == '}')
|
|
{
|
|
// Special character
|
|
token.Type = RtfTokenType.Text;
|
|
token.Key = ((char) c).ToString(CultureInfo.InvariantCulture);
|
|
}
|
|
else
|
|
{
|
|
token.Type = RtfTokenType.Control;
|
|
token.Key = ((char) c).ToString(CultureInfo.InvariantCulture);
|
|
|
|
if (token.Key == "\'")
|
|
{
|
|
// Read 2 hex characters
|
|
var text = new StringBuilder();
|
|
text.Append((char) _reader.Read());
|
|
text.Append((char) _reader.Read());
|
|
token.HasParam = true;
|
|
token.Hex = text.ToString().ToLower();
|
|
|
|
token.Param = Convert.ToInt32(text.ToString().ToLower(), 16);
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Read keyword
|
|
var keyword = new StringBuilder();
|
|
c = _reader.Peek();
|
|
|
|
while (char.IsLetter((char) c))
|
|
{
|
|
_reader.Read();
|
|
keyword.Append((char) c);
|
|
c = _reader.Peek();
|
|
}
|
|
|
|
token.Type = ext ? RtfTokenType.ExtKeyword : RtfTokenType.Keyword;
|
|
token.Key = keyword.ToString();
|
|
|
|
// Read an integer
|
|
if (char.IsDigit((char) c) || c == '-')
|
|
{
|
|
token.HasParam = true;
|
|
var negative = false;
|
|
|
|
if (c == '-')
|
|
{
|
|
negative = true;
|
|
_reader.Read();
|
|
}
|
|
|
|
c = _reader.Peek();
|
|
|
|
var text = new StringBuilder();
|
|
|
|
while (char.IsDigit((char) c))
|
|
{
|
|
_reader.Read();
|
|
text.Append((char) c);
|
|
c = _reader.Peek();
|
|
}
|
|
|
|
var param = Convert.ToInt32(text.ToString());
|
|
if (negative)
|
|
param = -param;
|
|
|
|
token.Param = param;
|
|
}
|
|
|
|
if (c == ' ')
|
|
_reader.Read();
|
|
}
|
|
#endregion
|
|
|
|
#region ParseText
|
|
/// <summary>
|
|
/// Parse text after char
|
|
/// </summary>
|
|
/// <param name="c"></param>
|
|
/// <param name="token"></param>
|
|
private void ParseText(int c, Token token)
|
|
{
|
|
var stringBuilder = new StringBuilder(((char) c).ToString(CultureInfo.InvariantCulture));
|
|
|
|
c = ClearWhiteSpace();
|
|
|
|
while (c != '\\' && c != '}' && c != '{' && c != Eof)
|
|
{
|
|
_reader.Read();
|
|
stringBuilder.Append((char) c);
|
|
c = ClearWhiteSpace();
|
|
}
|
|
|
|
token.Key = stringBuilder.ToString();
|
|
}
|
|
|
|
/// <summary>
|
|
/// Read chars until another non white space char is found
|
|
/// </summary>
|
|
/// <returns></returns>
|
|
private int ClearWhiteSpace()
|
|
{
|
|
var c = _reader.Peek();
|
|
while (c == '\r'
|
|
|| c == '\n'
|
|
|| c == '\t'
|
|
|| c == '\0')
|
|
{
|
|
_reader.Read();
|
|
c = _reader.Peek();
|
|
}
|
|
|
|
return c;
|
|
}
|
|
#endregion
|
|
}
|
|
} |