Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- // Parsed as per W3's HTML Standard
- function parseBBCode($HTMLSource)
- $PARSER_POSITION = 1;
- $$DATA_STATE = ""; // State of byte consumption
- $TEXT_COLLECTION; // Used to collect character tokens. Instead of emitting them as a single character
- $DOCUMENT = array(); // The final, tokenized, document structure.
- $OPEN_ELEMENTS = array(); // Track elements that have not been closed. Used to determine parents and children. (If a tag is open, and{ another is opened, we've found a family)
- $TEXT_COLLECTION = ""; // Collection of unprocessed unicode. Dumped into a textnode if left unused without an opening tag encountered.
- $ACTIVE_TOKEN; // Reference to the currently active token
- $ACTIVE_ATTRIBUTE; // Reference to the currently active attribute
- $TOKENS = array();
- $VALID_TAGS = array(
- 'spoiler' => "spoiler",
- };
- function CreateToken($Type){
- $Token = array(
- 'TagName' => $Type,
- 'SelfClosingFlag' => false,
- 'IsClosingTag' => false, // Key element to making the tree.
- 'Data' => "",
- 'Attributes' => array(),
- 'Style' => array(),
- );
- array_push($OPEN_ELEMENTS, $Token);
- array_push($TOKENS, $Token);
- if ($ACTIVE_TOKEN){
- // I believe this is an impossible parser error. Never had it happen.
- if (in_array($ACTIVE_TOKEN, $OPEN_ELEMENTS)){
- array_push($ACTIVE_TOKEN['Children'], $Token);
- }
- }
- return $Token;
- }
- function AddAttribute($TOKEN){
- $Attr = array(
- 'AttributeName' => "",
- 'Value' => ""
- );
- array_push($TOKEN['Attributes'], $Attr);
- return $Attr;
- }
- function EmitCharToOpenToken($Character){
- $OpenToken = $ACTIVE_TOKEN;
- $OpenToken['Data'] .= $Character;
- }
- function EmitToken($TOKEN){
- // Why do we add the token to the DOCUMENT without knowing its children?
- // Because closing tags are ALSO added to the document.
- // The closing tags will tell us when a nest ends.
- // EXAMPLE:
- /*
- OPEN TAG A
- OPEN TAG B
- CLOSE TAG B
- CLOSE TAG A
- */
- // Because closing tags are there, we will know when one nest ends. We know OPEN TAG B is inside of A because it comes after A and has a valid close tag.
- // Verify the tag name is a valid tag from VALID_TAGS
- // You may remove this check to accept arbitrary HTML tag names not supported via ROBLOX GUIs
- if ($VALID_TAGS[$TOKEN['TagName']] || strtolower($TOKEN['TagName']) == "comment" || strtolower($TOKEN['TagName']) == "__text"){
- if ($VALID_TAGS[$TOKEN['TagName']]){
- $TOKEN['TagName'] = $VALID_TAGS[$TOKEN['TagName']];
- }
- array_push($DOCUMENT, $ACTIVE_TOKEN);
- $key = array_search($ACTIVE_TOKEN, $TOKENS);
- unset($TOKENS[$key]);
- if ($TOKEN['SelfClosingFlag']){
- // If it is self-closing, remove it from the open list. Otherwise, it is still open.
- $key = array_search($OPEN_ELEMENTS, $ACTIVE_TOKEN);
- unsert($OPEN_ELEMENTS[$key]);
- }
- }
- $ACTIVE_TOKEN = null;
- $ACTIVE_ATTRIBUTE = null;
- }
- // Begin parsing
- while ($PARSER_POSITION < strlen($HTMLSource)) do
- $CurrentChar = substr($HTMLSource, $PARSER_POSITION, 1);
- if ($DATA_STATE == ""){
- if ($CurrentChar == "<"){
- // Emit text collection
- if (TEXT_COLLECTION){
- $ACTIVE_TOKEN = CreateToken("__Text")
- $ACTIVE_TOKEN.Data = TEXT_COLLECTION
- $ACTIVE_TOKEN.SelfClosingFlag = true
- EmitToken(ACTIVE_TOKEN)
- TEXT_COLLECTION = nil
- }else{
- EmitToken(ACTIVE_TOKEN)
- }
- $DATA_STATE = "TAG_OPEN"
- $PARSER_POSITION = $PARSER_POSITION + 1
- }else{
- // Emit character token
- if (TEXT_COLLECTION){
- TEXT_COLLECTION = TEXT_COLLECTION .. CurrentChar
- }else{
- TEXT_COLLECTION = CurrentChar
- }
- $PARSER_POSITION = $PARSER_POSITION + 1
- }
- }elseif{} ($DATA_STATE == "TAG_OPEN"){
- if ($CurrentChar == "!"){
- $DATA_STATE = "MARKUP_DECLARATION_OPEN"
- $PARSER_POSITION = $PARSER_POSITION + 1
- }elseif{ ($CurrentChar == "/"){
- $DATA_STATE = "END_TAG_OPEN"
- $PARSER_POSITION = $PARSER_POSITION + 1
- }else{
- $DATA_STATE = "TAG_NAME"
- $ACTIVE_TOKEN = CreateToken("")
- // DO NOT CONSUME
- }
- }elseif{ ($DATA_STATE == "END_TAG_OPEN"){
- if ($CurrentChar == ">"){
- // Parse error
- $DATA_STATE = ""
- $PARSER_POSITION = $PARSER_POSITION + 1
- }else{
- $DATA_STATE = "TAG_NAME"
- $ACTIVE_TOKEN = CreateToken("")
- $ACTIVE_TOKEN.IsClosingTag = true
- // DO NOT CONSUME
- }
- }elseif{ ($DATA_STATE == "TAG_NAME"){
- if ($CurrentChar == " "){
- $DATA_STATE = "BEFORE_ATTRIBUTE_NAME"
- $PARSER_POSITION = $PARSER_POSITION + 1
- }elseif{ ($CurrentChar == "/"){
- $DATA_STATE = "SELF_CLOSING_START_TAG"
- $PARSER_POSITION = $PARSER_POSITION + 1
- }elseif{ ($CurrentChar == ">"){
- $DATA_STATE = ""
- EmitToken(ACTIVE_TOKEN)
- $PARSER_POSITION = $PARSER_POSITION + 1
- }else{
- $ACTIVE_TOKEN.TagName = ACTIVE_TOKEN.TagName .. CurrentChar
- $PARSER_POSITION = $PARSER_POSITION + 1
- }
- }elseif{ ($DATA_STATE == "BEFORE_ATTRIBUTE_NAME"){
- if ($CurrentChar == " "){
- // Ignore
- $PARSER_POSITION = $PARSER_POSITION + 1
- }elseif{ ($CurrentChar == "/"){
- $DATA_STATE = "SELF_CLOSING_START_TAG"
- $PARSER_POSITION = $PARSER_POSITION + 1
- }elseif{ ($CurrentChar == '"' or CurrentChar == "'" or CurrentChar == "<"){
- $PARSER_POSITION = $PARSER_POSITION + 1
- }elseif{ ($CurrentChar == ">"){
- $DATA_STATE = ""
- EmitToken(ACTIVE_TOKEN)
- $PARSER_POSITION = $PARSER_POSITION + 1
- }else{
- $DATA_STATE = "ATTRIBUTE_NAME"
- ACTIVE_ATTRIBUTE = AddAttribute(ACTIVE_TOKEN)
- // DO NOT CONSUME
- }
- }elseif{ ($DATA_STATE == "ATTRIBUTE_NAME"){
- if ($CurrentChar == " "){
- $DATA_STATE = "AFTER_ATTRIBUTE_NAME"
- $PARSER_POSITION = $PARSER_POSITION + 1
- }elseif{ ($CurrentChar == "/"){
- $DATA_STATE = "SELF_CLOSING_START_TAG"
- $PARSER_POSITION = $PARSER_POSITION + 1
- }elseif{ ($CurrentChar == '"' or CurrentChar == "'"){
- $PARSER_POSITION = $PARSER_POSITION + 1
- }elseif{ ($CurrentChar == "="){
- $DATA_STATE = "BEFORE_ATTRIBUTE_VALUE"
- $PARSER_POSITION = $PARSER_POSITION + 1
- }elseif{ ($CurrentChar == ">"){
- $DATA_STATE = ""
- EmitToken(ACTIVE_TOKEN)
- $PARSER_POSITION = $PARSER_POSITION + 1
- }else{
- $PARSER_POSITION = $PARSER_POSITION + 1
- ACTIVE_ATTRIBUTE.AttributeName = ACTIVE_ATTRIBUTE.AttributeName .. CurrentChar
- }
- }elseif{ ($DATA_STATE == "BEFORE_ATTRIBUTE_VALUE"){
- if ($CurrentChar == " "){
- $PARSER_POSITION = $PARSER_POSITION + 1
- }elseif{ ($CurrentChar == '"'){
- $DATA_STATE = "ATTRIBUTE_VALUE_DOUBLE_QUOTE"
- $PARSER_POSITION = $PARSER_POSITION + 1
- }elseif{ ($CurrentChar == '&'){
- $DATA_STATE = "ATTRIBUTE_VALUE_UNQUOTED"
- // DO NOT CONSUME THE CHARACTER
- }elseif{ ($CurrentChar == "'"){
- $DATA_STATE = "ATTRIBUTE_VALUE_SINGLE_QUOTE"
- $PARSER_POSITION = $PARSER_POSITION + 1
- }elseif{ ($CurrentChar == ">"){
- $DATA_STATE = ""
- EmitToken(ACTIVE_TOKEN)
- $PARSER_POSITION = $PARSER_POSITION + 1
- }elseif{ ($CurrentChar == "<" or CurrentChar == "="){
- $PARSER_POSITION = $PARSER_POSITION + 1
- }else{
- ACTIVE_ATTRIBUTE.Value = ACTIVE_ATTRIBUTE.Value .. CurrentChar
- }
- }elseif{ ($DATA_STATE == "ATTRIBUTE_VALUE_DOUBLE_QUOTE"){
- if ($CurrentChar == '"'){
- $DATA_STATE = "AFTER_ATTRIBUTE_VALUE_QUOTED"
- $PARSER_POSITION = $PARSER_POSITION + 1
- }else{
- ACTIVE_ATTRIBUTE.Value = ACTIVE_ATTRIBUTE.Value .. CurrentChar
- $PARSER_POSITION = $PARSER_POSITION + 1
- }elseif{ ($DATA_STATE == "ATTRIBUTE_VALUE_SINGLE_QUOTE"){
- if ($CurrentChar == "'"){
- $DATA_STATE = "AFTER_ATTRIBUTE_VALUE_QUOTED"
- $PARSER_POSITION = $PARSER_POSITION + 1
- }else{
- ACTIVE_ATTRIBUTE.Value = ACTIVE_ATTRIBUTE.Value .. CurrentChar
- $PARSER_POSITION = $PARSER_POSITION + 1
- }elseif{ ($DATA_STATE == "AFTER_ATTRIBUTE_VALUE_QUOTED"){
- if ($CurrentChar == " "){
- $DATA_STATE = "BEFORE_ATTRIBUTE_NAME"
- $PARSER_POSITION = $PARSER_POSITION + 1
- }elseif{ ($CurrentChar == "/"){
- $DATA_STATE = "SELF_CLOSING_START_TAG"
- $PARSER_POSITION = $PARSER_POSITION + 1
- }else{
- $DATA_STATE = "BEFORE_ATTRIBUTE_NAME"
- // DO NOT CONSUME CHARACTER
- }elseif{ ($DATA_STATE == "SELF_CLOSING_START_TAG"){
- if ($CurrentChar == ">"){
- $DATA_STATE = ""
- $ACTIVE_TOKEN.SelfClosingFlag = true
- EmitToken(ACTIVE_TOKEN)
- $PARSER_POSITION = $PARSER_POSITION + 1
- }else{
- $DATA_STATE = "BEFORE_ATTRIBUTE_NAME"
- $PARSER_POSITION = $PARSER_POSITION + 1
- }elseif{ ($DATA_STATE == "MARKUP_DECLARATION_OPEN"){
- // Are next two bytes '//' ?
- if ($CurrentChar == "-" and HTMLSource:sub$PARSER_POSITION + 1,$PARSER_POSITION + 1) == "-"){
- $DATA_STATE = "COMMENT_START"
- $PARSER_POSITION = $PARSER_POSITION + 2
- $ACTIVE_TOKEN = CreateToken("Comment")
- // Comments are self-closing
- $ACTIVE_TOKEN.SelfClosingFlag = true
- }else{
- $DATA_STATE = "BOGUS_COMMENT"
- $PARSER_POSITION = $PARSER_POSITION + 1
- }elseif{ ($DATA_STATE == "BOGUS_COMMENT"){
- if ($CurrentChar ~= ">"){
- $PARSER_POSITION = $PARSER_POSITION + 1
- }else{
- $DATA_STATE = "";
- $PARSER_POSITION = $PARSER_POSITION + 1
- }elseif{ ($DATA_STATE == "COMMENT_START"){
- if ($CurrentChar == "-"){
- $DATA_STATE = "COMMENT_START_DASH"
- $PARSER_POSITION = $PARSER_POSITION + 1
- }else{
- EmitCharToOpenToken($CurrentChar)
- $PARSER_POSITION = $PARSER_POSITION + 1
- }elseif{ ($DATA_STATE == "COMMENT_START_DASH"){
- if ($CurrentChar == "-"){
- $DATA_STATE = "COMMENT_END"
- $PARSER_POSITION = $PARSER_POSITION + 1
- }elseif{ ($CurrentChar == ">"){
- $DATA_STATE = ""
- $PARSER_POSITION = $PARSER_POSITION + 1
- }else{
- $ACTIVE_TOKEN.Data = ACTIVE_TOKEN.Data .. CurrentChar
- $PARSER_POSITION = $PARSER_POSITION + 1
- }elseif{ ($DATA_STATE == "COMMENT_END"){
- if ($CurrentChar == ">"){
- $DATA_STATE = ""
- EmitToken(ACTIVE_TOKEN)
- $PARSER_POSITION = $PARSER_POSITION + 1
- }else{
- $DATA_STATE = "COMMENT_START"
- }
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement