Input text can be a byte or unicode string. And the output text is
always unicode. The callback method arguments are also always unicode
(unless it's int). The text that is returned from the callback methods
can be a byte or unicode string.

Note that everything is UTF-8.
This commit is contained in:
Frank Smit 2012-03-28 22:03:31 +02:00
parent 032ca130a7
commit ef82a13d5f
4 changed files with 410 additions and 292 deletions

View File

@ -12,6 +12,8 @@ News/Changelog
nothing.
- Removed trailing forward slashes from MANIFEST.in. These cause trouble on Windows.
([#12][]/[#13][])
- Input text now be a byte or unicode string. Callback method arguments are
always unicode. The returned text in a callback method can be a byte or unicode string
[#12]: https://github.com/FSX/misaka/pull/12
[#13]: https://github.com/FSX/misaka/pull/13

File diff suppressed because it is too large Load Diff

View File

@ -40,6 +40,12 @@ TABLE_ALIGNMASK = 3 # MKD_TABLE_ALIGNMASK
TABLE_HEADER = 4 # MKD_TABLE_HEADER
cdef char* _unicode_to_bytes(unicode text):
cdef bytes py_string = text.encode('UTF-8', 'strict')
cdef char *c_string = py_string
return c_string
def html(object text, unsigned int extensions=0, unsigned int render_flags=0):
"""Convert markdown text to (X)HTML::
@ -53,8 +59,11 @@ def html(object text, unsigned int extensions=0, unsigned int render_flags=0):
"""
# Convert string
cdef bytes py_string = text.encode('UTF-8', 'strict')
cdef char *c_string = py_string
cdef char *c_string
if isinstance(text, unicode):
c_string = _unicode_to_bytes(text)
else:
c_string = text
# Definitions
cdef sundown.sd_callbacks callbacks
@ -86,7 +95,7 @@ def html(object text, unsigned int extensions=0, unsigned int render_flags=0):
sundown.bufrelease(ob)
ob = sb
# Return a string and release buffers
# Return a unicode string and release buffers
try:
return (<char *> ob.data)[:ob.size].decode('UTF-8', 'strict')
finally:
@ -139,8 +148,13 @@ cdef class BaseRenderer:
cdef void **source = <void **> &wrapper.callback_funcs
cdef void **dest = <void **> &self.callbacks
cdef unicode method_name
for i from 0 <= i < <int> wrapper.method_count by 1:
if hasattr(self, wrapper.method_names[i]):
# In Python 3 ``wrapper.method_names[i]`` is a byte string.
# This means hasattr can't find any method in the renderer, so
# ``wrapper.method_names[i]`` is converted to a normal string first.
method_name = wrapper.method_names[i].decode('utf-8')
if hasattr(self, method_name):
dest[i] = source[i]
def setup(self):
@ -205,8 +219,11 @@ cdef class Markdown:
text = self.renderer.preprocess(text)
# Convert string
cdef bytes py_string = text.encode('UTF-8', 'strict')
cdef char *c_string = py_string
cdef char *c_string
if isinstance(text, unicode):
c_string = _unicode_to_bytes(text)
else:
c_string = text
# Buffers
cdef sundown.buf *ib = sundown.bufnew(128)

View File

@ -10,7 +10,7 @@
#define PROCESS_SPAN(method_name, ...) {\
struct renderopt *opt = opaque;\
PyObject *ret = PyObject_CallMethodObjArgs(\
(PyObject *) opt->self, Py_BuildValue("s", method_name),\
(PyObject *) opt->self, PyUnicode_FromString(method_name),\
__VA_ARGS__);\
if (ret == NULL || ret == Py_None) {\
PyObject *r_ex = PyErr_Occurred();\
@ -31,7 +31,7 @@
#define PROCESS_BLOCK(method_name, ...) {\
struct renderopt *opt = opaque;\
PyObject *ret = PyObject_CallMethodObjArgs(\
(PyObject *) opt->self, Py_BuildValue("s", method_name),\
(PyObject *) opt->self, PyUnicode_FromString(method_name),\
__VA_ARGS__);\
if (ret == NULL || ret == Py_None) {\
PyObject *r_ex = PyErr_Occurred();\
@ -48,7 +48,7 @@
}
#define PY_STR(b) (b != NULL ? Py_BuildValue("s#", b->data, (int) b->size) : Py_None)
#define PY_STR(b) (b != NULL ? PyUnicode_FromStringAndSize((const char *) b->data, (int) b->size) : Py_None)
#if PY_MAJOR_VERSION >= 3
#define PY_INT(i) PyLong_FromLong(i)