510 lines
13 KiB
Python
510 lines
13 KiB
Python
"""
|
|
Lexer for LESSCSS.
|
|
|
|
http://www.dabeaz.com/ply/ply.html
|
|
http://www.w3.org/TR/CSS21/grammar.html#scanner
|
|
http://lesscss.org/#docs
|
|
|
|
Copyright (c)
|
|
See LICENSE for details.
|
|
<jtm@robot.is>
|
|
"""
|
|
import re
|
|
import ply.lex as lex
|
|
from six import string_types
|
|
|
|
from lesscpy.lib import dom
|
|
from lesscpy.lib import css
|
|
from lesscpy.lib import reserved
|
|
|
|
|
|
class LessLexer:
|
|
states = (
|
|
('parn', 'inclusive'),
|
|
('escapequotes', 'inclusive'),
|
|
('escapeapostrophe', 'inclusive'),
|
|
('istringquotes', 'inclusive'),
|
|
('istringapostrophe', 'inclusive'),
|
|
('iselector', 'inclusive'),
|
|
('mediaquery', 'inclusive'),
|
|
('import', 'inclusive'),
|
|
)
|
|
literals = '<>=%!/*-+&'
|
|
tokens = [
|
|
'css_ident',
|
|
'css_dom',
|
|
'css_class',
|
|
'css_id',
|
|
'css_property',
|
|
'css_vendor_property',
|
|
'css_comment',
|
|
'css_string',
|
|
'css_color',
|
|
'css_filter',
|
|
'css_number',
|
|
'css_important',
|
|
'css_vendor_hack',
|
|
'css_uri',
|
|
'css_ms_filter',
|
|
|
|
'css_media_type',
|
|
'css_media_feature',
|
|
|
|
't_and',
|
|
't_not',
|
|
't_only',
|
|
|
|
'less_variable',
|
|
'less_comment',
|
|
'less_open_format',
|
|
'less_when',
|
|
'less_and',
|
|
'less_not',
|
|
|
|
't_ws',
|
|
't_popen',
|
|
't_pclose',
|
|
't_semicolon',
|
|
't_tilde',
|
|
't_colon',
|
|
't_comma',
|
|
|
|
't_eopen',
|
|
't_eclose',
|
|
|
|
't_isopen',
|
|
't_isclose',
|
|
|
|
't_bopen',
|
|
't_bclose'
|
|
]
|
|
tokens += list(set(reserved.tokens.values()))
|
|
# Tokens with significant following whitespace
|
|
significant_ws = set([
|
|
'css_class',
|
|
'css_id',
|
|
'css_dom',
|
|
'css_property',
|
|
'css_vendor_property',
|
|
'css_ident',
|
|
'css_number',
|
|
'css_color',
|
|
'css_media_type',
|
|
'css_filter',
|
|
'less_variable',
|
|
't_and',
|
|
't_not',
|
|
't_only',
|
|
'&',
|
|
])
|
|
significant_ws.update(reserved.tokens.values())
|
|
|
|
def __init__(self):
|
|
self.build(reflags=re.UNICODE | re.IGNORECASE)
|
|
self.last = None
|
|
self.next_ = None
|
|
self.pretok = True
|
|
|
|
def t_css_filter(self, t):
|
|
(r'\[[^\]]*\]'
|
|
'|(not|lang|nth-[a-z\-]+)\(.+\)'
|
|
'|and[ \t]\([^><\{]+\)')
|
|
return t
|
|
|
|
def t_css_ms_filter(self, t):
|
|
r'(?:progid:|DX\.)[^;\(]*'
|
|
return t
|
|
|
|
def t_t_bopen(self, t):
|
|
r'\{'
|
|
return t
|
|
|
|
def t_t_bclose(self, t):
|
|
r'\}'
|
|
return t
|
|
|
|
def t_t_colon(self, t):
|
|
r':'
|
|
return t
|
|
|
|
def t_t_comma(self, t):
|
|
r','
|
|
t.lexer.in_property_decl = False
|
|
return t
|
|
|
|
def t_css_number(self, t):
|
|
r'-?(\d*\.\d+|\d+)(s|%|in|ex|[ecm]m|p[txc]|deg|g?rad|ms?|k?hz|dpi|dpcm|dppx)?'
|
|
return t
|
|
|
|
def t_css_ident(self, t):
|
|
(r'([\-\.\#]?'
|
|
'([_a-z]'
|
|
'|[\200-\377]'
|
|
'|\\\[0-9a-f]{1,6}'
|
|
'|\\\[^\s\r\n0-9a-f])'
|
|
'([_a-z0-9\-]'
|
|
'|[\200-\377]'
|
|
'|\\\[0-9a-f]{1,6}'
|
|
'|\\\[^\s\r\n0-9a-f])*)'
|
|
'|\.')
|
|
v = t.value.strip()
|
|
c = v[0]
|
|
if c == '.':
|
|
# In some cases, only the '.' can be marked as CSS class.
|
|
#
|
|
# Example: .@{name}
|
|
#
|
|
t.type = 'css_class'
|
|
if t.lexer.lexstate != "iselector":
|
|
# Selector-chaining case (a.b.c), we are already in state 'iselector'
|
|
t.lexer.push_state("iselector")
|
|
elif c == '#':
|
|
t.type = 'css_id'
|
|
if len(v) in [4, 7]:
|
|
try:
|
|
int(v[1:], 16)
|
|
t.type = 'css_color'
|
|
except ValueError:
|
|
pass
|
|
elif v == 'when':
|
|
t.type = 'less_when'
|
|
elif v == 'and':
|
|
t.type = 'less_and'
|
|
elif v == 'not':
|
|
t.type = 'less_not'
|
|
elif v in css.propertys:
|
|
t.type = 'css_property'
|
|
t.lexer.in_property_decl = True
|
|
elif (v in dom.elements or v.lower() in dom.elements) and not t.lexer.in_property_decl:
|
|
# DOM elements can't be part of property declarations, avoids ambiguity between 'rect' DOM
|
|
# element and rect() CSS function.
|
|
t.type = 'css_dom'
|
|
elif c == '-':
|
|
t.type = 'css_vendor_property'
|
|
t.lexer.in_property_decl = True
|
|
t.value = v
|
|
return t
|
|
|
|
def t_iselector_less_variable(self, t):
|
|
r'@\{[^@\}]+\}'
|
|
return t
|
|
|
|
def t_iselector_t_eclose(self, t):
|
|
r'"|\''
|
|
# Can only happen if iselector state is on top of estring state.
|
|
#
|
|
# Example: @item: ~".col-xs-@{index}";
|
|
#
|
|
t.lexer.pop_state()
|
|
return t
|
|
|
|
def t_iselector_css_filter(self, t):
|
|
(r'\[[^\]]*\]'
|
|
'|(not|lang|nth-[a-z\-]+)\(.+\)'
|
|
'|and[ \t]\([^><\{]+\)')
|
|
# TODO/FIXME(saschpe): Only needs to be redifined in state 'iselector' so that
|
|
# the following css_class doesn't catch everything.
|
|
return t
|
|
|
|
def t_iselector_css_class(self, t):
|
|
r'[_a-z0-9\-]+'
|
|
# The first part of CSS class was tokenized by t_css_ident() already.
|
|
# Here we gather up the any LESS variable.
|
|
#
|
|
# Example: .span_@{num}_small
|
|
#
|
|
return t
|
|
|
|
def t_iselector_t_ws(self, t):
|
|
r'[ \t\f\v]+'
|
|
#
|
|
# Example: .span_@{num}
|
|
#
|
|
t.lexer.pop_state()
|
|
t.value = ' '
|
|
return t
|
|
|
|
def t_iselector_t_bopen(self, t):
|
|
r'\{'
|
|
t.lexer.pop_state()
|
|
return t
|
|
|
|
def t_iselector_t_colon(self, t):
|
|
r':'
|
|
t.lexer.pop_state()
|
|
return t
|
|
|
|
def t_mediaquery_t_not(self, t):
|
|
r'not'
|
|
return t
|
|
|
|
def t_mediaquery_t_only(self, t):
|
|
r'only'
|
|
return t
|
|
|
|
def t_mediaquery_t_and(self, t):
|
|
r'and'
|
|
return t
|
|
|
|
def t_mediaquery_t_popen(self, t):
|
|
r'\('
|
|
# Redefine global t_popen to avoid pushing state 'parn'
|
|
return t
|
|
|
|
@lex.TOKEN('|'.join(css.media_types))
|
|
def t_mediaquery_css_media_type(self, t):
|
|
return t
|
|
|
|
@lex.TOKEN('|'.join(css.media_features))
|
|
def t_mediaquery_css_media_feature(self, t):
|
|
return t
|
|
|
|
def t_mediaquery_t_bopen(self, t):
|
|
r'\{'
|
|
t.lexer.pop_state()
|
|
return t
|
|
|
|
def t_mediaquery_t_semicolon(self, t):
|
|
r';'
|
|
# This can happen only as part of a CSS import statement. The
|
|
# "mediaquery" state is reused there. Ordinary media queries always
|
|
# end at '{', i.e. when a block is opened.
|
|
t.lexer.pop_state() # state mediaquery
|
|
# We have to pop the 'import' state here because we already ate the
|
|
# t_semicolon and won't trigger t_import_t_semicolon.
|
|
t.lexer.pop_state() # state import
|
|
return t
|
|
|
|
@lex.TOKEN('|'.join(css.media_types))
|
|
def t_import_css_media_type(self, t):
|
|
# Example: @import url("bar.css") handheld and (max-width: 500px);
|
|
# Alternatively, we could use a lookahead "if not ';'" after the URL
|
|
# part of the @import statement...
|
|
t.lexer.push_state("mediaquery")
|
|
return t
|
|
|
|
def t_import_t_semicolon(self, t):
|
|
r';'
|
|
t.lexer.pop_state()
|
|
return t
|
|
|
|
def t_less_variable(self, t):
|
|
r'@@?[\w-]+|@\{[^@\}]+\}'
|
|
v = t.value.lower()
|
|
if v in reserved.tokens:
|
|
t.type = reserved.tokens[v]
|
|
if t.type == "css_media":
|
|
t.lexer.push_state("mediaquery")
|
|
elif t.type == "css_import":
|
|
t.lexer.push_state("import")
|
|
return t
|
|
|
|
def t_css_color(self, t):
|
|
r'\#[0-9]([0-9a-f]{5}|[0-9a-f]{2})'
|
|
return t
|
|
|
|
def t_parn_css_uri(self, t):
|
|
(r'data:[^\)]+'
|
|
'|(([a-z]+://)?'
|
|
'('
|
|
'(/?[\.a-z:]+[\w\.:]*[\\/][\\/]?)+'
|
|
'|([a-z][\w\.\-]+(\.[a-z0-9]+))'
|
|
'(\#[a-z]+)?)'
|
|
')+')
|
|
return t
|
|
|
|
def t_parn_css_ident(self, t):
|
|
(r'(([_a-z]'
|
|
'|[\200-\377]'
|
|
'|\\\[0-9a-f]{1,6}'
|
|
'|\\\[^\r\n\s0-9a-f])'
|
|
'([_a-z0-9\-]|[\200-\377]'
|
|
'|\\\[0-9a-f]{1,6}'
|
|
'|\\\[^\r\n\s0-9a-f])*)')
|
|
return t
|
|
|
|
def t_newline(self, t):
|
|
r'[\n\r]+'
|
|
t.lexer.lineno += t.value.count('\n')
|
|
|
|
def t_css_comment(self, t):
|
|
r'(/\*(.|\n|\r)*?\*/)'
|
|
t.lexer.lineno += t.value.count('\n')
|
|
pass
|
|
|
|
def t_less_comment(self, t):
|
|
r'//.*'
|
|
pass
|
|
|
|
def t_css_important(self, t):
|
|
r'!\s*important'
|
|
t.value = '!important'
|
|
return t
|
|
|
|
def t_t_ws(self, t):
|
|
r'[ \t\f\v]+'
|
|
t.value = ' '
|
|
return t
|
|
|
|
def t_t_popen(self, t):
|
|
r'\('
|
|
t.lexer.push_state('parn')
|
|
return t
|
|
|
|
def t_less_open_format(self, t):
|
|
r'%\('
|
|
t.lexer.push_state('parn')
|
|
return t
|
|
|
|
def t_parn_t_pclose(self, t):
|
|
r'\)'
|
|
t.lexer.pop_state()
|
|
return t
|
|
|
|
def t_t_pclose(self, t):
|
|
r'\)'
|
|
return t
|
|
|
|
def t_t_semicolon(self, t):
|
|
r';'
|
|
t.lexer.in_property_decl = False
|
|
return t
|
|
|
|
def t_t_eopen(self, t):
|
|
r'~"|~\''
|
|
if t.value[1] == '"':
|
|
t.lexer.push_state('escapequotes')
|
|
elif t.value[1] == '\'':
|
|
t.lexer.push_state('escapeapostrophe')
|
|
return t
|
|
|
|
def t_t_tilde(self, t):
|
|
r'~'
|
|
return t
|
|
|
|
def t_escapequotes_less_variable(self, t):
|
|
r'@\{[^@"\}]+\}'
|
|
return t
|
|
|
|
def t_escapeapostrophe_less_variable(self, t):
|
|
r'@\{[^@\'\}]+\}'
|
|
return t
|
|
|
|
def t_escapequotes_t_eclose(self, t):
|
|
r'"'
|
|
t.lexer.pop_state()
|
|
return t
|
|
|
|
def t_escapeapostrophe_t_eclose(self, t):
|
|
r'\''
|
|
t.lexer.pop_state()
|
|
return t
|
|
|
|
def t_css_string(self, t):
|
|
r'"[^"@]*"|\'[^\'@]*\''
|
|
t.lexer.lineno += t.value.count('\n')
|
|
return t
|
|
|
|
def t_t_isopen(self, t):
|
|
r'"|\''
|
|
if t.value[0] == '"':
|
|
t.lexer.push_state('istringquotes')
|
|
elif t.value[0] == '\'':
|
|
t.lexer.push_state('istringapostrophe')
|
|
return t
|
|
|
|
def t_istringquotes_less_variable(self, t):
|
|
r'@\{[^@"\}]+\}'
|
|
return t
|
|
|
|
def t_istringapostrophe_less_variable(self, t):
|
|
r'@\{[^@\'\}]+\}'
|
|
return t
|
|
|
|
def t_istringapostrophe_css_string(self, t):
|
|
r'[^\'@]+'
|
|
t.lexer.lineno += t.value.count('\n')
|
|
return t
|
|
|
|
def t_istringquotes_css_string(self, t):
|
|
r'[^"@]+'
|
|
t.lexer.lineno += t.value.count('\n')
|
|
return t
|
|
|
|
def t_istringquotes_t_isclose(self, t):
|
|
r'"'
|
|
t.lexer.pop_state()
|
|
return t
|
|
|
|
def t_istringapostrophe_t_isclose(self, t):
|
|
r'\''
|
|
t.lexer.pop_state()
|
|
return t
|
|
|
|
# Error handling rule
|
|
def t_error(self, t):
|
|
raise SyntaxError("Illegal character '%s' line %d" %
|
|
(t.value[0], t.lexer.lineno))
|
|
t.lexer.skip(1)
|
|
|
|
# Build the lexer
|
|
def build(self, **kwargs):
|
|
self.lexer = lex.lex(module=self, **kwargs)
|
|
# State-tracking variable, see http://www.dabeaz.com/ply/ply.html#ply_nn18
|
|
self.lexer.in_property_decl = False
|
|
|
|
def file(self, filename):
|
|
"""
|
|
Lex file.
|
|
"""
|
|
with open(filename) as f:
|
|
self.lexer.input(f.read())
|
|
return self
|
|
|
|
def input(self, file):
|
|
"""
|
|
Load lexer with content from `file` which can be a path or a file
|
|
like object.
|
|
"""
|
|
if isinstance(file, string_types):
|
|
with open(file) as f:
|
|
self.lexer.input(f.read())
|
|
else:
|
|
self.lexer.input(file.read())
|
|
|
|
def token(self):
|
|
"""
|
|
Token function. Contains 2 hacks:
|
|
1. Injects ';' into blocks where the last property
|
|
leaves out the ;
|
|
2. Strips out whitespace from nonsignificant locations
|
|
to ease parsing.
|
|
"""
|
|
if self.next_:
|
|
t = self.next_
|
|
self.next_ = None
|
|
return t
|
|
while True:
|
|
t = self.lexer.token()
|
|
if not t:
|
|
return t
|
|
if t.type == 't_ws' and (
|
|
self.pretok or (self.last
|
|
and self.last.type not in self.significant_ws)):
|
|
continue
|
|
self.pretok = False
|
|
if t.type == 't_bclose' and self.last and self.last.type not in ['t_bopen', 't_bclose'] and self.last.type != 't_semicolon' \
|
|
and not (hasattr(t, 'lexer') and (t.lexer.lexstate == 'escapequotes' or t.lexer.lexstate == 'escapeapostrophe')):
|
|
self.next_ = t
|
|
tok = lex.LexToken()
|
|
tok.type = 't_semicolon'
|
|
tok.value = ';'
|
|
tok.lineno = t.lineno
|
|
tok.lexpos = t.lexpos
|
|
self.last = tok
|
|
self.lexer.in_property_decl = False
|
|
return tok
|
|
self.last = t
|
|
break
|
|
return t
|