510 lines
13 KiB
Python
Raw Normal View History

2012-01-28 14:52:09 +00:00
"""
Lexer for LESSCSS.
2013-07-19 11:21:51 +02:00
2012-01-28 14:52:09 +00:00
http://www.dabeaz.com/ply/ply.html
http://www.w3.org/TR/CSS21/grammar.html#scanner
http://lesscss.org/#docs
2013-07-19 11:21:51 +02:00
2012-01-28 14:52:09 +00:00
Copyright (c)
See LICENSE for details.
<jtm@robot.is>
"""
import re
import ply.lex as lex
2014-02-16 17:48:07 +02:00
from six import string_types
2012-01-28 14:52:09 +00:00
from lesscpy.lib import dom
from lesscpy.lib import css
from lesscpy.lib import reserved
2012-01-28 14:52:09 +00:00
2013-07-19 11:21:51 +02:00
2012-01-28 14:52:09 +00:00
class LessLexer:
states = (
2013-07-19 11:21:51 +02:00
('parn', 'inclusive'),
('escapequotes', 'inclusive'),
('escapeapostrophe', 'inclusive'),
('istringquotes', 'inclusive'),
('istringapostrophe', 'inclusive'),
('iselector', 'inclusive'),
('mediaquery', 'inclusive'),
('import', 'inclusive'),
2012-01-28 14:52:09 +00:00
)
literals = '<>=%!/*-+&'
2012-01-28 14:52:09 +00:00
tokens = [
'css_ident',
'css_dom',
'css_class',
'css_id',
'css_property',
'css_vendor_property',
'css_comment',
'css_string',
'css_color',
'css_filter',
'css_number',
'css_important',
'css_vendor_hack',
'css_uri',
2012-03-23 19:39:29 +00:00
'css_ms_filter',
2013-07-19 11:21:51 +02:00
'css_media_type',
'css_media_feature',
't_and',
't_not',
't_only',
2012-01-28 14:52:09 +00:00
'less_variable',
'less_comment',
'less_open_format',
2012-03-24 17:23:14 +00:00
'less_when',
'less_and',
'less_not',
2013-07-19 11:21:51 +02:00
2012-01-28 14:52:09 +00:00
't_ws',
't_popen',
't_pclose',
't_semicolon',
't_tilde',
2013-12-13 17:26:59 +01:00
't_colon',
't_comma',
't_eopen',
't_eclose',
't_isopen',
't_isclose',
't_bopen',
't_bclose'
2012-01-28 14:52:09 +00:00
]
tokens += list(set(reserved.tokens.values()))
2012-03-24 18:33:19 +00:00
# Tokens with significant following whitespace
significant_ws = set([
2013-07-19 11:21:51 +02:00
'css_class',
'css_id',
2012-02-19 20:38:19 +00:00
'css_dom',
'css_property',
'css_vendor_property',
'css_ident',
'css_number',
2012-03-15 18:52:26 +00:00
'css_color',
'css_media_type',
'css_filter',
2012-03-23 17:02:39 +00:00
'less_variable',
't_and',
't_not',
't_only',
2012-02-19 20:38:19 +00:00
'&',
])
significant_ws.update(reserved.tokens.values())
2013-07-19 11:21:51 +02:00
2012-01-28 14:52:09 +00:00
def __init__(self):
2013-07-19 11:21:51 +02:00
self.build(reflags=re.UNICODE | re.IGNORECASE)
2012-02-19 20:38:19 +00:00
self.last = None
2013-07-19 11:53:00 +02:00
self.next_ = None
2012-03-25 16:34:38 +00:00
self.pretok = True
2013-07-19 11:21:51 +02:00
2012-01-28 14:52:09 +00:00
def t_css_filter(self, t):
(r'\[[^\]]*\]'
2013-07-19 11:21:51 +02:00
'|(not|lang|nth-[a-z\-]+)\(.+\)'
'|and[ \t]\([^><\{]+\)')
2012-01-28 14:52:09 +00:00
return t
2013-07-19 11:21:51 +02:00
2012-03-23 19:39:29 +00:00
def t_css_ms_filter(self, t):
r'(?:progid:|DX\.)[^;\(]*'
2012-03-23 19:39:29 +00:00
return t
2013-07-19 11:21:51 +02:00
def t_t_bopen(self, t):
r'\{'
return t
def t_t_bclose(self, t):
r'\}'
return t
2013-12-13 17:26:59 +01:00
def t_t_colon(self, t):
r':'
return t
def t_t_comma(self, t):
r','
t.lexer.in_property_decl = False
return t
def t_css_number(self, t):
r'-?(\d*\.\d+|\d+)(s|%|in|ex|[ecm]m|p[txc]|deg|g?rad|ms?|k?hz|dpi|dpcm|dppx)?'
return t
2012-01-28 14:52:09 +00:00
def t_css_ident(self, t):
(r'([\-\.\#]?'
'([_a-z]'
'|[\200-\377]'
'|\\\[0-9a-f]{1,6}'
'|\\\[^\s\r\n0-9a-f])'
'([_a-z0-9\-]'
'|[\200-\377]'
'|\\\[0-9a-f]{1,6}'
'|\\\[^\s\r\n0-9a-f])*)'
'|\.')
2012-01-28 14:52:09 +00:00
v = t.value.strip()
c = v[0]
if c == '.':
# In some cases, only the '.' can be marked as CSS class.
#
# Example: .@{name}
#
2012-01-28 14:52:09 +00:00
t.type = 'css_class'
if t.lexer.lexstate != "iselector":
# Selector-chaining case (a.b.c), we are already in state 'iselector'
t.lexer.push_state("iselector")
2012-01-28 14:52:09 +00:00
elif c == '#':
t.type = 'css_id'
2012-03-13 21:12:09 +00:00
if len(v) in [4, 7]:
try:
int(v[1:], 16)
t.type = 'css_color'
except ValueError:
pass
2012-03-24 17:23:14 +00:00
elif v == 'when':
t.type = 'less_when'
elif v == 'and':
t.type = 'less_and'
elif v == 'not':
t.type = 'less_not'
2012-01-28 14:52:09 +00:00
elif v in css.propertys:
t.type = 'css_property'
t.lexer.in_property_decl = True
elif (v in dom.elements or v.lower() in dom.elements) and not t.lexer.in_property_decl:
# DOM elements can't be part of property declarations, avoids ambiguity between 'rect' DOM
# element and rect() CSS function.
2012-01-28 14:52:09 +00:00
t.type = 'css_dom'
elif c == '-':
t.type = 'css_vendor_property'
t.lexer.in_property_decl = True
2013-10-31 15:30:42 +01:00
t.value = v
2012-01-28 14:52:09 +00:00
return t
2013-07-19 11:21:51 +02:00
def t_iselector_less_variable(self, t):
r'@\{[^@\}]+\}'
return t
def t_iselector_t_eclose(self, t):
r'"|\''
# Can only happen if iselector state is on top of estring state.
#
# Example: @item: ~".col-xs-@{index}";
#
t.lexer.pop_state()
return t
def t_iselector_css_filter(self, t):
(r'\[[^\]]*\]'
'|(not|lang|nth-[a-z\-]+)\(.+\)'
'|and[ \t]\([^><\{]+\)')
# TODO/FIXME(saschpe): Only needs to be redifined in state 'iselector' so that
# the following css_class doesn't catch everything.
return t
def t_iselector_css_class(self, t):
r'[_a-z0-9\-]+'
# The first part of CSS class was tokenized by t_css_ident() already.
# Here we gather up the any LESS variable.
#
# Example: .span_@{num}_small
#
return t
def t_iselector_t_ws(self, t):
r'[ \t\f\v]+'
#
# Example: .span_@{num}
#
t.lexer.pop_state()
t.value = ' '
return t
def t_iselector_t_bopen(self, t):
r'\{'
t.lexer.pop_state()
return t
2013-12-13 17:26:59 +01:00
def t_iselector_t_colon(self, t):
r':'
t.lexer.pop_state()
return t
def t_mediaquery_t_not(self, t):
r'not'
return t
def t_mediaquery_t_only(self, t):
r'only'
return t
def t_mediaquery_t_and(self, t):
r'and'
return t
def t_mediaquery_t_popen(self, t):
r'\('
# Redefine global t_popen to avoid pushing state 'parn'
return t
@lex.TOKEN('|'.join(css.media_types))
def t_mediaquery_css_media_type(self, t):
return t
@lex.TOKEN('|'.join(css.media_features))
def t_mediaquery_css_media_feature(self, t):
return t
def t_mediaquery_t_bopen(self, t):
r'\{'
t.lexer.pop_state()
return t
def t_mediaquery_t_semicolon(self, t):
r';'
# This can happen only as part of a CSS import statement. The
# "mediaquery" state is reused there. Ordinary media queries always
# end at '{', i.e. when a block is opened.
t.lexer.pop_state() # state mediaquery
# We have to pop the 'import' state here because we already ate the
# t_semicolon and won't trigger t_import_t_semicolon.
t.lexer.pop_state() # state import
return t
@lex.TOKEN('|'.join(css.media_types))
def t_import_css_media_type(self, t):
# Example: @import url("bar.css") handheld and (max-width: 500px);
# Alternatively, we could use a lookahead "if not ';'" after the URL
# part of the @import statement...
t.lexer.push_state("mediaquery")
return t
def t_import_t_semicolon(self, t):
r';'
t.lexer.pop_state()
return t
def t_less_variable(self, t):
r'@@?[\w-]+|@\{[^@\}]+\}'
v = t.value.lower()
if v in reserved.tokens:
t.type = reserved.tokens[v]
if t.type == "css_media":
t.lexer.push_state("mediaquery")
elif t.type == "css_import":
t.lexer.push_state("import")
return t
2013-07-19 11:21:51 +02:00
2012-01-28 14:52:09 +00:00
def t_css_color(self, t):
r'\#[0-9]([0-9a-f]{5}|[0-9a-f]{2})'
return t
2013-07-19 11:21:51 +02:00
2012-01-28 14:52:09 +00:00
def t_parn_css_uri(self, t):
(r'data:[^\)]+'
'|(([a-z]+://)?'
'('
2012-06-14 07:26:56 +00:00
'(/?[\.a-z:]+[\w\.:]*[\\/][\\/]?)+'
2012-01-28 14:52:09 +00:00
'|([a-z][\w\.\-]+(\.[a-z0-9]+))'
'(\#[a-z]+)?)'
')+')
return t
2013-07-19 11:21:51 +02:00
2012-01-28 14:52:09 +00:00
def t_parn_css_ident(self, t):
(r'(([_a-z]'
'|[\200-\377]'
2012-02-19 20:38:19 +00:00
'|\\\[0-9a-f]{1,6}'
2012-01-28 14:52:09 +00:00
'|\\\[^\r\n\s0-9a-f])'
'([_a-z0-9\-]|[\200-\377]'
2012-02-19 20:38:19 +00:00
'|\\\[0-9a-f]{1,6}'
2012-01-28 14:52:09 +00:00
'|\\\[^\r\n\s0-9a-f])*)')
return t
2013-07-19 11:21:51 +02:00
2012-01-28 14:52:09 +00:00
def t_newline(self, t):
2012-02-19 20:38:19 +00:00
r'[\n\r]+'
t.lexer.lineno += t.value.count('\n')
2013-07-19 11:21:51 +02:00
2012-01-28 14:52:09 +00:00
def t_css_comment(self, t):
2012-02-19 20:38:19 +00:00
r'(/\*(.|\n|\r)*?\*/)'
2012-01-28 14:52:09 +00:00
t.lexer.lineno += t.value.count('\n')
pass
def t_less_comment(self, t):
r'//.*'
pass
2013-07-19 11:21:51 +02:00
2012-01-28 14:52:09 +00:00
def t_css_important(self, t):
r'!\s*important'
t.value = '!important'
return t
2013-07-19 11:21:51 +02:00
2012-01-28 14:52:09 +00:00
def t_t_ws(self, t):
2012-02-19 20:38:19 +00:00
r'[ \t\f\v]+'
2012-03-03 18:14:31 +00:00
t.value = ' '
2012-02-19 20:38:19 +00:00
return t
2012-01-28 14:52:09 +00:00
def t_t_popen(self, t):
r'\('
t.lexer.push_state('parn')
return t
2013-07-19 11:21:51 +02:00
2012-01-28 14:52:09 +00:00
def t_less_open_format(self, t):
r'%\('
t.lexer.push_state('parn')
return t
2013-07-19 11:21:51 +02:00
2013-12-13 17:26:59 +01:00
def t_parn_t_pclose(self, t):
2012-01-28 14:52:09 +00:00
r'\)'
t.lexer.pop_state()
return t
2013-07-19 11:21:51 +02:00
2013-12-13 17:26:59 +01:00
def t_t_pclose(self, t):
r'\)'
return t
def t_t_semicolon(self, t):
r';'
t.lexer.in_property_decl = False
return t
def t_t_eopen(self, t):
r'~"|~\''
if t.value[1] == '"':
t.lexer.push_state('escapequotes')
elif t.value[1] == '\'':
t.lexer.push_state('escapeapostrophe')
return t
def t_t_tilde(self, t):
r'~'
return t
def t_escapequotes_less_variable(self, t):
r'@\{[^@"\}]+\}'
return t
def t_escapeapostrophe_less_variable(self, t):
r'@\{[^@\'\}]+\}'
return t
def t_escapequotes_t_eclose(self, t):
r'"'
t.lexer.pop_state()
return t
def t_escapeapostrophe_t_eclose(self, t):
r'\''
t.lexer.pop_state()
2012-01-28 14:52:09 +00:00
return t
2013-07-19 11:21:51 +02:00
2012-02-19 20:38:19 +00:00
def t_css_string(self, t):
r'"[^"@]*"|\'[^\'@]*\''
t.lexer.lineno += t.value.count('\n')
return t
def t_t_isopen(self, t):
r'"|\''
if t.value[0] == '"':
t.lexer.push_state('istringquotes')
elif t.value[0] == '\'':
t.lexer.push_state('istringapostrophe')
return t
def t_istringquotes_less_variable(self, t):
r'@\{[^@"\}]+\}'
return t
def t_istringapostrophe_less_variable(self, t):
r'@\{[^@\'\}]+\}'
return t
def t_istringapostrophe_css_string(self, t):
r'[^\'@]+'
2012-02-19 20:38:19 +00:00
t.lexer.lineno += t.value.count('\n')
return t
2013-07-19 11:21:51 +02:00
def t_istringquotes_css_string(self, t):
r'[^"@]+'
t.lexer.lineno += t.value.count('\n')
return t
def t_istringquotes_t_isclose(self, t):
r'"'
t.lexer.pop_state()
return t
def t_istringapostrophe_t_isclose(self, t):
r'\''
t.lexer.pop_state()
return t
2012-01-28 14:52:09 +00:00
# Error handling rule
def t_error(self, t):
2013-07-19 11:21:51 +02:00
raise SyntaxError("Illegal character '%s' line %d" %
(t.value[0], t.lexer.lineno))
2012-01-28 14:52:09 +00:00
t.lexer.skip(1)
2013-07-19 11:21:51 +02:00
2012-01-28 14:52:09 +00:00
# Build the lexer
def build(self, **kwargs):
2013-07-19 11:21:51 +02:00
self.lexer = lex.lex(module=self, **kwargs)
# State-tracking variable, see http://www.dabeaz.com/ply/ply.html#ply_nn18
self.lexer.in_property_decl = False
2013-07-19 11:21:51 +02:00
2012-01-28 14:52:09 +00:00
def file(self, filename):
2012-03-24 18:33:19 +00:00
"""
Lex file.
"""
2012-01-28 14:52:09 +00:00
with open(filename) as f:
self.lexer.input(f.read())
2012-02-19 20:38:19 +00:00
return self
2013-07-19 11:21:51 +02:00
2014-02-16 15:54:00 +02:00
def input(self, file):
2012-03-24 18:33:19 +00:00
"""
2014-02-16 15:54:00 +02:00
Load lexer with content from `file` which can be a path or a file
like object.
2012-03-24 18:33:19 +00:00
"""
2014-02-16 17:48:07 +02:00
if isinstance(file, string_types):
2014-02-16 15:54:00 +02:00
with open(file) as f:
self.lexer.input(f.read())
else:
self.lexer.input(file.read())
2013-07-19 11:21:51 +02:00
2012-01-28 14:52:09 +00:00
def token(self):
2012-02-26 10:59:21 +00:00
"""
2012-03-24 18:33:19 +00:00
Token function. Contains 2 hacks:
1. Injects ';' into blocks where the last property
leaves out the ;
2. Strips out whitespace from nonsignificant locations
to ease parsing.
2012-02-26 10:59:21 +00:00
"""
2013-07-19 11:53:00 +02:00
if self.next_:
t = self.next_
self.next_ = None
2012-03-15 19:14:24 +00:00
return t
2012-02-19 20:38:19 +00:00
while True:
t = self.lexer.token()
2013-07-19 11:21:51 +02:00
if not t:
return t
2012-03-25 16:34:38 +00:00
if t.type == 't_ws' and (
2013-07-19 11:21:51 +02:00
self.pretok or (self.last
and self.last.type not in self.significant_ws)):
2012-02-19 20:38:19 +00:00
continue
2012-03-25 16:34:38 +00:00
self.pretok = False
if t.type == 't_bclose' and self.last and self.last.type not in ['t_bopen', 't_bclose'] and self.last.type != 't_semicolon' \
and not (hasattr(t, 'lexer') and (t.lexer.lexstate == 'escapequotes' or t.lexer.lexstate == 'escapeapostrophe')):
2013-07-19 11:53:00 +02:00
self.next_ = t
2012-03-15 19:14:24 +00:00
tok = lex.LexToken()
tok.type = 't_semicolon'
2012-03-15 19:14:24 +00:00
tok.value = ';'
tok.lineno = t.lineno
tok.lexpos = t.lexpos
self.last = tok
self.lexer.in_property_decl = False
2012-03-15 19:14:24 +00:00
return tok
2012-02-19 20:38:19 +00:00
self.last = t
break
return t