""" Lexer for LESSCSS. http://www.dabeaz.com/ply/ply.html http://www.w3.org/TR/CSS21/grammar.html#scanner http://lesscss.org/#docs Copyright (c) See LICENSE for details. <jtm@robot.is> """ import re import ply.lex as lex from six import string_types from lesscpy.lib import dom from lesscpy.lib import css from lesscpy.lib import reserved class LessLexer: states = ( ('parn', 'inclusive'), ('escapequotes', 'inclusive'), ('escapeapostrophe', 'inclusive'), ('istringquotes', 'inclusive'), ('istringapostrophe', 'inclusive'), ('iselector', 'inclusive'), ('mediaquery', 'inclusive'), ('import', 'inclusive'), ) literals = '<>=%!/*-+&' tokens = [ 'css_ident', 'css_dom', 'css_class', 'css_id', 'css_property', 'css_vendor_property', 'css_comment', 'css_string', 'css_color', 'css_filter', 'css_number', 'css_important', 'css_vendor_hack', 'css_uri', 'css_ms_filter', 'css_media_type', 'css_media_feature', 't_and', 't_not', 't_only', 'less_variable', 'less_comment', 'less_open_format', 'less_when', 'less_and', 'less_not', 't_ws', 't_popen', 't_pclose', 't_semicolon', 't_tilde', 't_colon', 't_comma', 't_eopen', 't_eclose', 't_isopen', 't_isclose', 't_bopen', 't_bclose' ] tokens += list(set(reserved.tokens.values())) # Tokens with significant following whitespace significant_ws = set([ 'css_class', 'css_id', 'css_dom', 'css_property', 'css_vendor_property', 'css_ident', 'css_number', 'css_color', 'css_media_type', 'css_filter', 'less_variable', 't_and', 't_not', 't_only', '&', ]) significant_ws.update(reserved.tokens.values()) def __init__(self): self.build(reflags=re.UNICODE | re.IGNORECASE) self.last = None self.next_ = None self.pretok = True def t_css_filter(self, t): (r'\[[^\]]*\]' '|(not|lang|nth-[a-z\-]+)\(.+\)' '|and[ \t]\([^><\{]+\)') return t def t_css_ms_filter(self, t): r'(?:progid:|DX\.)[^;\(]*' return t def t_t_bopen(self, t): r'\{' return t def t_t_bclose(self, t): r'\}' return t def t_t_colon(self, t): r':' return t def t_t_comma(self, t): r',' t.lexer.in_property_decl = False return t def t_css_number(self, t): r'-?(\d*\.\d+|\d+)(s|%|in|ex|[ecm]m|p[txc]|deg|g?rad|ms?|k?hz|dpi|dpcm|dppx)?' return t def t_css_ident(self, t): (r'([\-\.\#]?' '([_a-z]' '|[\200-\377]' '|\\\[0-9a-f]{1,6}' '|\\\[^\s\r\n0-9a-f])' '([_a-z0-9\-]' '|[\200-\377]' '|\\\[0-9a-f]{1,6}' '|\\\[^\s\r\n0-9a-f])*)' '|\.') v = t.value.strip() c = v[0] if c == '.': # In some cases, only the '.' can be marked as CSS class. # # Example: .@{name} # t.type = 'css_class' if t.lexer.lexstate != "iselector": # Selector-chaining case (a.b.c), we are already in state 'iselector' t.lexer.push_state("iselector") elif c == '#': t.type = 'css_id' if len(v) in [4, 7]: try: int(v[1:], 16) t.type = 'css_color' except ValueError: pass elif v == 'when': t.type = 'less_when' elif v == 'and': t.type = 'less_and' elif v == 'not': t.type = 'less_not' elif v in css.propertys: t.type = 'css_property' t.lexer.in_property_decl = True elif (v in dom.elements or v.lower() in dom.elements) and not t.lexer.in_property_decl: # DOM elements can't be part of property declarations, avoids ambiguity between 'rect' DOM # element and rect() CSS function. t.type = 'css_dom' elif c == '-': t.type = 'css_vendor_property' t.lexer.in_property_decl = True t.value = v return t def t_iselector_less_variable(self, t): r'@\{[^@\}]+\}' return t def t_iselector_t_eclose(self, t): r'"|\'' # Can only happen if iselector state is on top of estring state. # # Example: @item: ~".col-xs-@{index}"; # t.lexer.pop_state() return t def t_iselector_css_filter(self, t): (r'\[[^\]]*\]' '|(not|lang|nth-[a-z\-]+)\(.+\)' '|and[ \t]\([^><\{]+\)') # TODO/FIXME(saschpe): Only needs to be redifined in state 'iselector' so that # the following css_class doesn't catch everything. return t def t_iselector_css_class(self, t): r'[_a-z0-9\-]+' # The first part of CSS class was tokenized by t_css_ident() already. # Here we gather up the any LESS variable. # # Example: .span_@{num}_small # return t def t_iselector_t_ws(self, t): r'[ \t\f\v]+' # # Example: .span_@{num} # t.lexer.pop_state() t.value = ' ' return t def t_iselector_t_bopen(self, t): r'\{' t.lexer.pop_state() return t def t_iselector_t_colon(self, t): r':' t.lexer.pop_state() return t def t_mediaquery_t_not(self, t): r'not' return t def t_mediaquery_t_only(self, t): r'only' return t def t_mediaquery_t_and(self, t): r'and' return t def t_mediaquery_t_popen(self, t): r'\(' # Redefine global t_popen to avoid pushing state 'parn' return t @lex.TOKEN('|'.join(css.media_types)) def t_mediaquery_css_media_type(self, t): return t @lex.TOKEN('|'.join(css.media_features)) def t_mediaquery_css_media_feature(self, t): return t def t_mediaquery_t_bopen(self, t): r'\{' t.lexer.pop_state() return t def t_mediaquery_t_semicolon(self, t): r';' # This can happen only as part of a CSS import statement. The # "mediaquery" state is reused there. Ordinary media queries always # end at '{', i.e. when a block is opened. t.lexer.pop_state() # state mediaquery # We have to pop the 'import' state here because we already ate the # t_semicolon and won't trigger t_import_t_semicolon. t.lexer.pop_state() # state import return t @lex.TOKEN('|'.join(css.media_types)) def t_import_css_media_type(self, t): # Example: @import url("bar.css") handheld and (max-width: 500px); # Alternatively, we could use a lookahead "if not ';'" after the URL # part of the @import statement... t.lexer.push_state("mediaquery") return t def t_import_t_semicolon(self, t): r';' t.lexer.pop_state() return t def t_less_variable(self, t): r'@@?[\w-]+|@\{[^@\}]+\}' v = t.value.lower() if v in reserved.tokens: t.type = reserved.tokens[v] if t.type == "css_media": t.lexer.push_state("mediaquery") elif t.type == "css_import": t.lexer.push_state("import") return t def t_css_color(self, t): r'\#[0-9]([0-9a-f]{5}|[0-9a-f]{2})' return t def t_parn_css_uri(self, t): (r'data:[^\)]+' '|(([a-z]+://)?' '(' '(/?[\.a-z:]+[\w\.:]*[\\/][\\/]?)+' '|([a-z][\w\.\-]+(\.[a-z0-9]+))' '(\#[a-z]+)?)' ')+') return t def t_parn_css_ident(self, t): (r'(([_a-z]' '|[\200-\377]' '|\\\[0-9a-f]{1,6}' '|\\\[^\r\n\s0-9a-f])' '([_a-z0-9\-]|[\200-\377]' '|\\\[0-9a-f]{1,6}' '|\\\[^\r\n\s0-9a-f])*)') return t def t_newline(self, t): r'[\n\r]+' t.lexer.lineno += t.value.count('\n') def t_css_comment(self, t): r'(/\*(.|\n|\r)*?\*/)' t.lexer.lineno += t.value.count('\n') pass def t_less_comment(self, t): r'//.*' pass def t_css_important(self, t): r'!\s*important' t.value = '!important' return t def t_t_ws(self, t): r'[ \t\f\v]+' t.value = ' ' return t def t_t_popen(self, t): r'\(' t.lexer.push_state('parn') return t def t_less_open_format(self, t): r'%\(' t.lexer.push_state('parn') return t def t_parn_t_pclose(self, t): r'\)' t.lexer.pop_state() return t def t_t_pclose(self, t): r'\)' return t def t_t_semicolon(self, t): r';' t.lexer.in_property_decl = False return t def t_t_eopen(self, t): r'~"|~\'' if t.value[1] == '"': t.lexer.push_state('escapequotes') elif t.value[1] == '\'': t.lexer.push_state('escapeapostrophe') return t def t_t_tilde(self, t): r'~' return t def t_escapequotes_less_variable(self, t): r'@\{[^@"\}]+\}' return t def t_escapeapostrophe_less_variable(self, t): r'@\{[^@\'\}]+\}' return t def t_escapequotes_t_eclose(self, t): r'"' t.lexer.pop_state() return t def t_escapeapostrophe_t_eclose(self, t): r'\'' t.lexer.pop_state() return t def t_css_string(self, t): r'"[^"@]*"|\'[^\'@]*\'' t.lexer.lineno += t.value.count('\n') return t def t_t_isopen(self, t): r'"|\'' if t.value[0] == '"': t.lexer.push_state('istringquotes') elif t.value[0] == '\'': t.lexer.push_state('istringapostrophe') return t def t_istringquotes_less_variable(self, t): r'@\{[^@"\}]+\}' return t def t_istringapostrophe_less_variable(self, t): r'@\{[^@\'\}]+\}' return t def t_istringapostrophe_css_string(self, t): r'[^\'@]+' t.lexer.lineno += t.value.count('\n') return t def t_istringquotes_css_string(self, t): r'[^"@]+' t.lexer.lineno += t.value.count('\n') return t def t_istringquotes_t_isclose(self, t): r'"' t.lexer.pop_state() return t def t_istringapostrophe_t_isclose(self, t): r'\'' t.lexer.pop_state() return t # Error handling rule def t_error(self, t): raise SyntaxError("Illegal character '%s' line %d" % (t.value[0], t.lexer.lineno)) t.lexer.skip(1) # Build the lexer def build(self, **kwargs): self.lexer = lex.lex(module=self, **kwargs) # State-tracking variable, see http://www.dabeaz.com/ply/ply.html#ply_nn18 self.lexer.in_property_decl = False def file(self, filename): """ Lex file. """ with open(filename) as f: self.lexer.input(f.read()) return self def input(self, file): """ Load lexer with content from `file` which can be a path or a file like object. """ if isinstance(file, string_types): with open(file) as f: self.lexer.input(f.read()) else: self.lexer.input(file.read()) def token(self): """ Token function. Contains 2 hacks: 1. Injects ';' into blocks where the last property leaves out the ; 2. Strips out whitespace from nonsignificant locations to ease parsing. """ if self.next_: t = self.next_ self.next_ = None return t while True: t = self.lexer.token() if not t: return t if t.type == 't_ws' and ( self.pretok or (self.last and self.last.type not in self.significant_ws)): continue self.pretok = False if t.type == 't_bclose' and self.last and self.last.type not in ['t_bopen', 't_bclose'] and self.last.type != 't_semicolon' \ and not (hasattr(t, 'lexer') and (t.lexer.lexstate == 'escapequotes' or t.lexer.lexstate == 'escapeapostrophe')): self.next_ = t tok = lex.LexToken() tok.type = 't_semicolon' tok.value = ';' tok.lineno = t.lineno tok.lexpos = t.lexpos self.last = tok self.lexer.in_property_decl = False return tok self.last = t break return t