Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- //====================================================================
- // File: XLex.h
- // Author: Shawn Presser
- // Contact: shawnpresser@gmail.com
- // Date: 11-13-08
- //
- // Purpose: To separate an ASCII .x file into a set of tokens.
- //
- // Copyright (c) 2008 Shawn Presser
- //
- // Permission is hereby granted, free of charge, to any person
- // obtaining a copy of this software and associated documentation
- // files (the "Software"), to deal in the Software without
- // restriction, including without limitation the rights to use,
- // copy, modify, merge, publish, distribute, sublicense, and/or sell
- // copies of the Software, and to permit persons to whom the
- // Software is furnished to do so, subject to the following
- // conditions:
- //
- // The above copyright notice and this permission notice shall be
- // included in all copies or substantial portions of the Software.
- //
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- // OTHER DEALINGS IN THE SOFTWARE.
- //====================================================================
- #pragma once
- // std c++ headers.
- #include <list>
- // forward declarations.
- struct XToken;
- enum ETokenType;
- //====================================================================
- // XLex
- //====================================================================
- class XLex
- {
- public:
- // lexes a .X file that resides in memory.
- XLex( const char* file, size_t fileSize );
- ~XLex();
- // returns the list of X file tokens.
- XToken* GetTokens() const { return _head; }
- private:
- typedef std::list< XToken* > TokenContainer;
- // generates one token.
- bool Tokenize();
- // constructs a new token and finalizes any previous token.
- bool AddToken( ETokenType type, unsigned int length, unsigned int advance );
- // allocates a new token.
- XToken* AllocateToken();
- // searches for a character and returns its position (relative to
- // '_at'). If the end of the file is reached, the position of the
- // last character in the file is returned.
- unsigned int Find( char c, const char* start ) const;
- // returns true if the pointer points to a float (rather than an int).
- bool IsFloat( const char* pos ) const;
- // store a list of token arrays to be freed on destruction.
- TokenContainer _tokenPools;
- // the current token pool from which to pull new tokens.
- XToken* _tokenPool;
- unsigned int _tokenPoolIdx;
- // the result token list.
- XToken* _head;
- XToken* _last;
- // the file data to lex.
- const char* _fileStart;
- const char* _fileEnd;
- const char* _at;
- // the current line number.
- unsigned int _lineNumber;
- // the number of result tokens.
- unsigned int _tokenCount;
- };
- //====================================================================
- // File: XLex.cpp
- // Author: Shawn Presser
- // Contact: shawnpresser@gmail.com
- // Date: 11-13-08
- //
- // Copyright (c) 2008 Shawn Presser
- //
- // Permission is hereby granted, free of charge, to any person
- // obtaining a copy of this software and associated documentation
- // files (the "Software"), to deal in the Software without
- // restriction, including without limitation the rights to use,
- // copy, modify, merge, publish, distribute, sublicense, and/or sell
- // copies of the Software, and to permit persons to whom the
- // Software is furnished to do so, subject to the following
- // conditions:
- //
- // The above copyright notice and this permission notice shall be
- // included in all copies or substantial portions of the Software.
- //
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- // OTHER DEALINGS IN THE SOFTWARE.
- //====================================================================
- // module header.
- #include "XLex.h"
- // std c headers.
- #include <ctype.h> // isdigit
- #include <assert.h> // assert
- // project headers.
- #include "XToken.h"
- // constants.
- #define TOKEN_POOL_SIZE 20480
- //====================================================================
- // XLex
- //====================================================================
- //--------------------------------------------------------------------
- XLex::XLex( const char* file, size_t fileSize )
- : _tokenPool( 0 )
- , _tokenPoolIdx( 0 )
- , _head( 0 )
- , _last( 0 )
- , _fileStart( file )
- , _fileEnd( file + fileSize )
- , _at( file )
- , _lineNumber( 0 )
- , _tokenCount( 0 )
- {
- // allocate an initial token pool to pull new tokens from.
- _tokenPool = new XToken[ TOKEN_POOL_SIZE ];
- _tokenPools.push_back( _tokenPool );
- // only tokenize if the file actually has data.
- if( fileSize > 0 )
- {
- // tokenize the entire file.
- while ( Tokenize() )
- {
- }
- }
- }
- //--------------------------------------------------------------------
- XLex::~XLex()
- {
- // delete each token pool.
- for ( TokenContainer::iterator it = _tokenPools.begin(); it != _tokenPools.end(); ++it )
- {
- XToken* tokenPool = *it;
- delete [] tokenPool;
- }
- }
- //--------------------------------------------------------------------
- bool
- XLex::Tokenize()
- {
- // if the current character is whitespace, just skip it.
- while ( isspace( *_at ) )
- {
- // if the current character is a newline, increment our
- // line number counter.
- if ( *_at == '\n' )
- {
- ++_lineNumber;
- }
- ++_at;
- // if we've reached the end of the file, stop.
- if ( _at >= _fileEnd )
- {
- return false;
- }
- }
- // if the current character is a negation sign (-), get a pointer to the
- // next character.
- const char* num = _at;
- if ( *num == '-' )
- {
- ++num;
- // if we've reached the end of the file, stop.
- if ( _at >= _fileEnd )
- {
- return false;
- }
- }
- // test whether the current character is a number.
- if ( isdigit( *num ) )
- {
- // if it is, then determine whether the number is a float.
- if ( IsFloat( num ) )
- {
- // find the end of the float by iterating until we run into
- // a non-digit, with the exception of a decimal point, which
- // we skip once.
- bool skipDecimal = true;
- const char* end;
- for ( end = num; end < _fileEnd; ++end )
- {
- if ( !isdigit( *end ) )
- {
- // if the current character isn't a decimal, we've found
- // the end of the float.
- if ( *end != '.' )
- {
- break;
- }
- // otherwise, if we already skipped one decimal, then this
- // second decimal is the end of the float.
- if ( !skipDecimal )
- {
- break;
- }
- // the current character is the first decimal we've run into,
- // so indicate that we shouldn't skip any more decimals.
- skipDecimal = false;
- }
- }
- unsigned int length = end - _at;
- // create a float token.
- return AddToken( TT_FLOAT, length, length );
- }
- else
- {
- // find the end of the integer.
- const char* end;
- for ( end = num; end < _fileEnd; ++end )
- {
- if ( !isdigit( *end ) )
- {
- break;
- }
- }
- unsigned int length = end - _at;
- // create an integer token.
- return AddToken( TT_INTEGER, length, length );
- }
- }
- // if the current character is one of the single-letter tokens,
- // then tokenize it.
- switch ( *_at )
- {
- case '{': return AddToken( TT_OPEN_BRACE, 1, 1 );
- case '}': return AddToken( TT_CLOSE_BRACE, 1, 1 );
- case '[': return AddToken( TT_OPEN_BRACKET, 1, 1 );
- case ']': return AddToken( TT_CLOSE_BRACKET, 1, 1 );
- case ',': return AddToken( TT_COMMA, 1, 1 );
- case ';': return AddToken( TT_SEMICOLON, 1, 1 );
- }
- // if the current character is a quote, then create a string token.
- if ( *_at == '"' )
- {
- // advance past the opening quote.
- ++_at;
- // find the position of the end quote.
- unsigned int length = Find( '"', _at );
- // create a string token, advancing the file pointer past the
- // close quote.
- return AddToken( TT_STRING, length, length+1 );
- }
- // if the current character is a <, then create a uuid token.
- if ( *_at == '<' )
- {
- // advance past the opening symbol.
- ++_at;
- // find the position of the closing symbol.
- unsigned int length = Find( '>', _at );
- // create a uuid token, advancing the file pointer one past the
- // closing symbol.
- return AddToken( TT_UUID, length, length+1 );
- }
- // if the current character is a letter or an underscore, parse an
- // identifier.
- if ( isalpha( *_at ) || *_at == '_' )
- {
- // find the end of the identifier by searching for the first
- // non-alpha-numeric character that also isn't an underscore.
- const char* end;
- for ( end = _at+1; end < _fileEnd; ++end )
- {
- if ( !isalpha( *end ) && !isdigit( *end ) && *end != '_' )
- {
- break;
- }
- }
- unsigned int length = end - _at;
- // create an identifier token..
- return AddToken( TT_IDENTIFIER, length, length );
- }
- // if the file pointer is pointing to a pound sign or a double forward
- // slash (//), then it is a line comment, so skip the rest of the line.
- if ( *_at == '#' || ( ( _fileEnd - _at ) >= 2 && ( _at[0] == '/' ) && ( _at[1] == '/' ) ) )
- {
- // advance the file pointer to the end of the line.
- while ( *_at != '\n' )
- {
- ++_at;
- // if we've reached the end of the file, stop.
- if ( _at >= _fileEnd )
- {
- return false;
- }
- }
- }
- // the character is not a number, open brace, close brace, comma, semicolon,
- // string, uuid, identifier, or comma; it must be invalid. We could raise
- // an error condition, but we may as well just skip it.
- ++_at;
- // return whether the whole file has been lexed.
- return ( _at < _fileEnd );
- }
- //--------------------------------------------------------------------
- bool
- XLex::AddToken( ETokenType type, unsigned int length, unsigned int advance )
- {
- // construct a new token.
- XToken* result = AllocateToken();
- result->type = type;
- result->start = _at;
- result->end = _at + length;
- result->next = 0;
- // if there was a previous token, then append the new token
- // to the token list.
- if ( _last )
- {
- _last->next = result;
- }
- // set the new token to be the latest token.
- _last = result;
- // if this is the first token to be created, then track it
- // as the 'head' token.
- if ( !_head )
- {
- _head = result;
- }
- // advance the file pointer.
- _at += advance;
- // increment our token count.
- ++_tokenCount;
- // return true if the file pointer hasn't reached the end,
- // otherwise return false.
- return ( _at < _fileEnd );
- }
- //--------------------------------------------------------------------
- XToken*
- XLex::AllocateToken()
- {
- // sanity check the pool size.
- assert( TOKEN_POOL_SIZE > 0 );
- // validate that a token pool has been initially allocated.
- assert( _tokenPool );
- // if the token pool is full, allocate a new batch of tokens
- // to pull from.
- if ( _tokenPoolIdx >= TOKEN_POOL_SIZE )
- {
- _tokenPool = new XToken[ TOKEN_POOL_SIZE ];
- _tokenPools.push_back( _tokenPool );
- _tokenPoolIdx = 0;
- }
- // return an unused token.
- return &_tokenPool[ _tokenPoolIdx++ ];
- }
- //--------------------------------------------------------------------
- unsigned int
- XLex::Find( char c, const char* start ) const
- {
- // find the first occurrence of the character.
- for ( const char* iter = start; iter != _fileEnd; ++iter )
- {
- if ( *iter == c )
- {
- // return its position relative to '_at'.
- return iter - _at;
- }
- }
- // if we've reached the end of the file, return the
- // position of the last valid character.
- return ( _fileEnd - 1 ) - _at;
- }
- //--------------------------------------------------------------------
- bool
- XLex::IsFloat( const char* pos ) const
- {
- // if the current character is a negation sign, skip it.
- if ( *pos == '-' )
- {
- ++pos;
- }
- // search for a decimal point.
- for ( const char* iter = pos; iter < _fileEnd; ++iter )
- {
- char c = *iter;
- // if the current character is not a digit, test whether it's
- // a decimal. If it is, the number is a float, so return true.
- // Otherwise, the number is not a float, so return false.
- if ( !isdigit( c ) )
- {
- return ( c == '.' );
- }
- }
- // if we've reached the end of the file without finding a decimal
- // but without finding a non-digit, then return false, because this
- // number is an integer.
- return false;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement