Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /** Parses a block of CSS code and returns it as a serialised object. Supports even unorthodox use such as nested rulesets and "junk" declarations. Comments are left intact. */
- function tokenise(e){var t=[],e=undefined==="a"[0]?e.split(""):e,n=/^\s+|\s+$/g,r,i=0,s,o,u,a=t;for(;;){r=e[i];++i;if(undefined===r){if(s){if(1===s.type){a.push(s);if(s.P){u=s;s=s.P;delete u.P}else s=null}else{o={2:"[",4:"(",8:'"',16:"'"};while(30&s.type)if(s.P){u=s;s=s.P;console.log(u.data,u.type);s.data+=o[u.type]+u.data}}if(s){if(32===s.type){if(s.data){if(o=s.data.match(/\s*([^:]+):\s*([^\x00]+)\s*/m))s.push({name:o[1],value:o[2].replace(n,"")});else if(s.data=s.data.replace(n,""))s.push(s.data)}}while(s.P){u=s;a=s=u.P;delete u.P;delete u.data}t.push(s)}s=null;a=t;u=null}break}if(s){if(1===s.type){if("*"===r&&"/"===e[i]){++i;a.push(s);if(s.P){u=s;s=s.P;delete u.P}else s=null}else s.data+=r}else if(2===s.type){if("]"===r){if(s.P){u=s;s=s.P;s.data+="["+u.data+"]";u=null}else{s.data="["+s.data+"]";delete s.type}}else if('"'===r)s={type:8,P:s,data:""};else if("'"===r)s={type:16,P:s,data:""};else if("["===r){s={type:2,P:s,data:""}}else if("/"===r&&"*"===e[i]){++i;u=s;s={type:1,data:"",P:u};if((u.data||u.textBefore)&&(u=((u.textBefore||"")+u.data).replace(/^\s+/g,"")))s.textBefore=u;u=null}else s.data+=r}else if(4===s.type){if(")"===r){if(s.P){u=s;s=s.P;s.data+="("+u.data+")";u=null}else{s.data="("+s.data+")";delete s.type}}else if('"'===r)s={type:8,P:s,data:""};else if("'"===r)s={type:16,P:s,data:""};else if("["===r){s={type:2,P:s,data:""}}else if("/"===r&&"*"===e[i]){++i;u=s;s={type:1,data:"",P:u};if((u.data||u.textBefore)&&(u=((u.textBefore||"")+u.data).replace(/^\s+/g,"")))s.textBefore=u;u=null}else s.data+=r}else if(8===s.type){if('"'===r&&!("\\"===e[i-2]&&false===/"(?:[^\\"]|\\.)*"/g.test('"'+s.data+'"'))){if(s.P){u=s;s=s.P;s.data+='"'+u.data+'"';u=null}else{s.data='"'+s.data+'"';delete s.type}}else s.data+=r}else if(16===s.type){if("'"===r&&!("\\"===e[i-2]&&false===/'(?:[^\\']|\\.)*'/g.test("'"+s.data+"'"))){if(s.P){u=s;s=s.P;s.data+="'"+u.data+"'";u=null}else{s.data="'"+s.data+"'";delete s.type}}else s.data+=r}else{if("["===r){s={type:2,P:s,textBefore:s.data+"[",data:""}}else if("("===r){s={type:4,P:s,textBefore:s.data+"(",data:""}}else if('"'===r){s={type:8,P:s,textBefore:s.data+'"',data:""}}else if("'"===r){s={type:16,P:s,textBefore:s.data+"'",data:""}}else if("{"===r){a=[];a.type=32;a.data="";a.name=s.data.replace(n,"");s.data="";if(s instanceof Array){s.push(a);a.P=s}else a.P=s.P;s=a}else if(";"===r){if(o=s.data.match(/\s*([^:]+):\s*([^\x00]+)\s*/m))s.push({name:o[1],value:o[2]});else(s instanceof Array?s:a).push(s.data.replace(/^\s+/g,"")+";");s.data=""}else if("}"===r){if(s.data){if(o=s.data.match(/\s*([^:]+):\s*([^\x00]+)\s*/m))s.push({name:o[1],value:o[2].replace(n,"")});else{if(s.data=s.data.replace(n,""))s.push(s.data)}}u=s;if(s.P){a=s=u.P}else{t.push(s);s=null;a=t}delete u.P;delete u.data;u=null}else if("/"===r&&"*"===e[i]){++i;u=s;s={type:1,data:"",P:u};if(u.data&&(u=u.data.replace(/^\s+/g,"")))s.textBefore=u;u=null}else{s.data+=r}}}else{if(" "===r||"\n"===r||" "===r)continue;if("/"===r&&"*"===e[i]){++i;s={type:1,data:""}}else if("{"===r){a=s=[];s.type=32;s.data=s.name=""}else s={data:r}}}return t};
- /** Uncompressed version of above function so y'all can see what the hell's going on. */
- function tokenise(string){
- /** Returned CSS array. */
- var CSS = [],
- /** Run a quick-and-dirty hack for browsers that don't support direct character access in strings (thanks, IE7) */
- string = (undefined === "a"[0] ? string.split("") : string),
- /** Token type constants */
- T_COMMENT = 1,
- T_BRACKETS_SQUARE = 2,
- T_BRACKETS_ROUND = 4,
- T_QUOTES_DOUBLE = 8,
- T_QUOTES_SINGLE = 16,
- T_BLOCK = 32,
- /* Matches any bracket or quote-like token type. */
- T_DELIMITER = 30,
- /** RegExp for stripping leading/trailing whitespace. */
- rTrim = /^\s+|\s+$/g,
- /** Iterator variables */
- char, index = 0, token,
- /** Junk variables: used for juggling data within the loop. May be overwritten for whatever. */
- prop, prev,
- /**
- * Pointer to the last block that was opened in the token stack. Used for dumping injected comments that were found between parsable tokens.
- * Note that injected comments retain a copy of the leading character data (minus whitespace) so developers can use any injected commentary
- * to supply "custom properties" or metadata to their scripts.
- */
- dumpTo = CSS;
- for(;;){
- char = string[index];
- ++index;
- /** EOT? Bail. */
- if(undefined === char){
- /** We've still got a token hanging open, which means some idiot developer's forgotten to close a bracket or something. */
- if(token){
- /** Unclosed comment */
- if(T_COMMENT === token.type){
- dumpTo.push(token);
- if(token.parent){
- prev = token;
- token = token.parent;
- delete prev.parent;
- }
- else token = null;
- }
- /** Anything else that we were collecting that's supposed to be serialised into a string. */
- else{
- /**
- * Use an object literal for retrieving the leading delimiter characters inside
- * the following while loop. Saves us running four different checks per cycle.
- */
- prop = {
- 2: "[", // T_BRACKETS_SQUARE
- 4: "(", // T_BRACKETS_ROUND
- 8: '"', // T_QUOTES_DOUBLE
- 16: "'" // T_QUOTES_SINGLE
- };
- while(T_DELIMITER & token.type)
- if(token.parent){
- prev = token;
- token = token.parent;
- console.log(prev.data, prev.type);
- token.data += prop[prev.type] + prev.data;
- }
- }
- /** Make sure we're not operating on a token that's been emptied from top-level (e.g., unclosed comment block at top-level) */
- if(token){
- /** Unclosed block */
- if(T_BLOCK === token.type){
- /** This token's still carrying unassigned data. */
- if(token.data){
- if(prop = token.data.match(/\s*([^:]+):\s*([^\x00]+)\s*/m))
- token.push({
- name: prop[1],
- value: prop[2].replace(rTrim, "")
- });
- /** Junk (that isn't whitespace) */
- else if(token.data = token.data.replace(rTrim, ""))
- token.push(token.data);
- }
- }
- /** Right. Now wrap it up. */
- while(token.parent){
- prev = token;
- dumpTo =
- token = prev.parent;
- delete prev.parent;
- delete prev.data;
- }
- CSS.push(token);
- }
- token = null;
- dumpTo = CSS;
- prev = null;
- }
- break;
- }
- /** We've currently picked up a token. */
- if(token){
- /** Comment */
- if(T_COMMENT === token.type){
- /** End of comment. */
- if("*" === char && "/" === string[index]){
- ++index;
- /** Because comments are free to be inserted virtually anywhere in CSS, we need to use a special variable
- for appending them (since .parent may point to a string-only token like brackets). */
- dumpTo.push(token);
- /** Comment somewhere inside a block */
- if(token.parent){
- prev = token;
- token = token.parent;
- delete prev.parent;
- }
- /** This was a comment at top-level, so don't store any back-references. */
- else token = null;
- }
- else token.data += char;
- }
- /** [Square brackets] */
- else if(T_BRACKETS_SQUARE === token.type){
- if("]" === char){
- if(token.parent){
- prev = token; // Store a reference to the current token so we can append the data after switching.
- token = token.parent; // Move the focus back to the token's parent.
- token.data += "[" + prev.data + "]";
- prev = null;
- }
- /** This could have only been picked up with a selector like "[hidden]" or something without leading word characters. */
- else{
- token.data = "[" + token.data + "]";
- delete token.type;
- }
- }
- /** Watch out for quotes. */
- else if('"' === char) token = {type: T_QUOTES_DOUBLE, parent: token, data: ""};
- else if("'" === char) token = {type: T_QUOTES_SINGLE, parent: token, data: ""};
- /** Look out for nesting, too. */
- else if("[" === char){
- token = {
- type: T_BRACKETS_SQUARE,
- parent: token,
- data: ""
- };
- }
- /** Start of an injected comment */
- else if("/" === char && "*" === string[index]){
- ++index;
- prev = token;
- token = {
- type: T_COMMENT,
- data: "",
- parent: prev
- };
- if((prev.data || prev.textBefore) && (prev = ((prev.textBefore || "") + prev.data).replace(/^\s+/g, "")))
- token.textBefore = prev;
- prev = null;
- }
- else token.data += char;
- }
- /** (Round brackets) */
- else if(T_BRACKETS_ROUND === token.type){
- /** Exact same procedure with square brackets. Note that we're duplicating our code block to avoid carrying a few extra variables around in memory. */
- if(")" === char){
- if(token.parent){
- prev = token;
- token = token.parent;
- token.data += "(" + prev.data + ")";
- prev = null;
- }
- /** Absolutely no idea how this could've happened. Something like "(whatever)" as a selector is meaningless. Whatever. */
- else{
- token.data = "(" + token.data + ")";
- delete token.type;
- }
- }
- /** Watch out for quotes. */
- else if('"' === char) token = {type: T_QUOTES_DOUBLE, parent: token, data: ""};
- else if("'" === char) token = {type: T_QUOTES_SINGLE, parent: token, data: ""};
- /** Look out for nesting, too. */
- else if("[" === char){
- token = {
- type: T_BRACKETS_SQUARE,
- parent: token,
- data: ""
- };
- }
- /** Start of an injected comment */
- else if("/" === char && "*" === string[index]){
- ++index;
- prev = token;
- token = {
- type: T_COMMENT,
- data: "",
- parent: prev
- };
- if((prev.data || prev.textBefore) && (prev = ((prev.textBefore || "") + prev.data).replace(/^\s+/g, "")))
- token.textBefore = prev;
- prev = null;
- }
- else token.data += char;
- }
- /** "Double quotes" */
- else if(T_QUOTES_DOUBLE === token.type){
- /** End of quote. */
- if('"' === char && !("\\" === string[index-2] && false === /"(?:[^\\"]|\\.)*"/g.test('"'+token.data+'"'))){
- if(token.parent){
- prev = token;
- token = token.parent;
- token.data += '"' + prev.data + '"';
- prev = null;
- }
- /** Not entirely sure how this happened... */
- else{
- token.data = '"' + token.data + '"';
- delete token.type;
- }
- }
- else token.data += char;
- }
- /** 'Single quotes' */
- else if(T_QUOTES_SINGLE === token.type){
- /** End of quote. */
- if("'" === char && !("\\" === string[index-2] && false === /'(?:[^\\']|\\.)*'/g.test("'"+token.data+"'"))){
- if(token.parent){
- prev = token;
- token = token.parent;
- token.data += "'" + prev.data + "'";
- prev = null;
- }
- /** Not entirely sure how this happened... */
- else{
- token.data = "'" + token.data + "'";
- delete token.type;
- }
- }
- else token.data += char;
- }
- /** No token type currently assigned. */
- else{
- /** [Square brackets] */
- if("[" === char){
- token = {
- type: T_BRACKETS_SQUARE,
- parent: token,
- textBefore: token.data + "[",
- data: ""
- };
- }
- /** (Round brackets) */
- else if("(" === char){
- token = {
- type: T_BRACKETS_ROUND,
- parent: token,
- textBefore: token.data + "(",
- data: ""
- };
- }
- /** "Double "quotes" */
- else if('"' === char){
- token = {
- type: T_QUOTES_DOUBLE,
- parent: token,
- textBefore: token.data + '"',
- data: ""
- };
- }
- /** 'Single quotes' */
- else if("'" === char){
- token = {
- type: T_QUOTES_SINGLE,
- parent: token,
- textBefore: token.data + "'",
- data: ""
- };
- }
- /** Block */
- else if("{" === char){
- /** Since we need to assign our dumpTo variable to the newly created block anyway,
- * we may as well commendere the variable for creating it from our existing token. */
- dumpTo = [];
- dumpTo.type = T_BLOCK;
- dumpTo.data = "";
- dumpTo.name = token.data.replace(rTrim, "");
- token.data = "";
- if(token instanceof Array){
- token.push(dumpTo);
- dumpTo.parent = token;
- }
- else dumpTo.parent = token.parent;
- /** Switch focus to our newly-created block token. */
- token = dumpTo;
- }
- /** Semicolon: end of property declaration? */
- else if(";" === char){
- /** Break the previous token apart by the first colon, and assign it as a new property value. */
- if(prop = token.data.match(/\s*([^:]+):\s*([^\x00]+)\s*/m))
- token.push({
- name: prop[1],
- value: prop[2]
- });
- /** If no colon was found, then this is a meaningless declaration. Store it anyway as junk. */
- else (token instanceof Array ? token : dumpTo).push(token.data.replace(/^\s+/g, "") + ";");
- token.data = "";
- }
- /** End of block */
- else if("}" === char){
- /** Since the last property declaration in a block may omit a trailing semicolon, check for any collected data first. */
- if(token.data){
- if(prop = token.data.match(/\s*([^:]+):\s*([^\x00]+)\s*/m))
- token.push({
- name: prop[1],
- value: prop[2].replace(rTrim, "")
- });
- /** Junk, then. Stow it anyway unless it's whitespace. */
- else{
- if(token.data = token.data.replace(rTrim, ""))
- token.push(token.data);
- }
- }
- /** Store a reference to the current token. */
- prev = token;
- /** The current token's nested inside another token. */
- if(token.parent){
- dumpTo =
- token = prev.parent;
- }
- /** Token's at top-level, so push it onto the end of our returned CSS array. */
- else{
- CSS.push(token);
- token = null;
- dumpTo = CSS;
- }
- /** Free up some memory by losing some properties we no longer need. */
- delete prev.parent;
- delete prev.data;
- prev = null;
- }
- /** Nested comment */
- else if("/" === char && "*" === string[index]){
- ++index;
- prev = token;
- token = {
- type: T_COMMENT,
- data: "",
- parent: prev
- };
- if(prev.data && (prev = prev.data.replace(/^\s+/g, "")))
- token.textBefore = prev;
- prev = null;
- }
- /** Still no known token type. */
- else{
- token.data += char;
- }
- }
- }
- /** No token currently being carried yet, which means we're cruising at top-level. */
- else{
- /** Whitespace? Ignore. */
- if("\t" === char || "\n" === char || " " === char) continue;
- /** Comment */
- if("/" === char && "*" === string[index]){
- ++index;
- token = {
- type: T_COMMENT,
- data: ""
- };
- }
- /** Selector-less block? WTF?!
- * This shouldn't ever happen unless a weird coder's decided to put { ... } in the middle of nowhere. */
- else if("{" === char){
- dumpTo =
- token = [];
- token.type = T_BLOCK;
- token.data =
- token.name = "";
- }
- /** Something else? */
- else token = {data: char};
- }
- }
- return CSS;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement