Unicode!
Input text can be a byte or unicode string. And the output text is always unicode. The callback method arguments are also always unicode (unless it's int). The text that is returned from the callback methods can be a byte or unicode string. Note that everything is UTF-8.
This commit is contained in:
parent
032ca130a7
commit
ef82a13d5f
|
@ -12,6 +12,8 @@ News/Changelog
|
|||
nothing.
|
||||
- Removed trailing forward slashes from MANIFEST.in. These cause trouble on Windows.
|
||||
([#12][]/[#13][])
|
||||
- Input text now be a byte or unicode string. Callback method arguments are
|
||||
always unicode. The returned text in a callback method can be a byte or unicode string
|
||||
|
||||
[#12]: https://github.com/FSX/misaka/pull/12
|
||||
[#13]: https://github.com/FSX/misaka/pull/13
|
||||
|
|
665
src/misaka.c
665
src/misaka.c
File diff suppressed because it is too large
Load Diff
|
@ -40,6 +40,12 @@ TABLE_ALIGNMASK = 3 # MKD_TABLE_ALIGNMASK
|
|||
TABLE_HEADER = 4 # MKD_TABLE_HEADER
|
||||
|
||||
|
||||
cdef char* _unicode_to_bytes(unicode text):
|
||||
cdef bytes py_string = text.encode('UTF-8', 'strict')
|
||||
cdef char *c_string = py_string
|
||||
return c_string
|
||||
|
||||
|
||||
def html(object text, unsigned int extensions=0, unsigned int render_flags=0):
|
||||
"""Convert markdown text to (X)HTML::
|
||||
|
||||
|
@ -53,8 +59,11 @@ def html(object text, unsigned int extensions=0, unsigned int render_flags=0):
|
|||
"""
|
||||
|
||||
# Convert string
|
||||
cdef bytes py_string = text.encode('UTF-8', 'strict')
|
||||
cdef char *c_string = py_string
|
||||
cdef char *c_string
|
||||
if isinstance(text, unicode):
|
||||
c_string = _unicode_to_bytes(text)
|
||||
else:
|
||||
c_string = text
|
||||
|
||||
# Definitions
|
||||
cdef sundown.sd_callbacks callbacks
|
||||
|
@ -86,7 +95,7 @@ def html(object text, unsigned int extensions=0, unsigned int render_flags=0):
|
|||
sundown.bufrelease(ob)
|
||||
ob = sb
|
||||
|
||||
# Return a string and release buffers
|
||||
# Return a unicode string and release buffers
|
||||
try:
|
||||
return (<char *> ob.data)[:ob.size].decode('UTF-8', 'strict')
|
||||
finally:
|
||||
|
@ -139,8 +148,13 @@ cdef class BaseRenderer:
|
|||
cdef void **source = <void **> &wrapper.callback_funcs
|
||||
cdef void **dest = <void **> &self.callbacks
|
||||
|
||||
cdef unicode method_name
|
||||
for i from 0 <= i < <int> wrapper.method_count by 1:
|
||||
if hasattr(self, wrapper.method_names[i]):
|
||||
# In Python 3 ``wrapper.method_names[i]`` is a byte string.
|
||||
# This means hasattr can't find any method in the renderer, so
|
||||
# ``wrapper.method_names[i]`` is converted to a normal string first.
|
||||
method_name = wrapper.method_names[i].decode('utf-8')
|
||||
if hasattr(self, method_name):
|
||||
dest[i] = source[i]
|
||||
|
||||
def setup(self):
|
||||
|
@ -205,8 +219,11 @@ cdef class Markdown:
|
|||
text = self.renderer.preprocess(text)
|
||||
|
||||
# Convert string
|
||||
cdef bytes py_string = text.encode('UTF-8', 'strict')
|
||||
cdef char *c_string = py_string
|
||||
cdef char *c_string
|
||||
if isinstance(text, unicode):
|
||||
c_string = _unicode_to_bytes(text)
|
||||
else:
|
||||
c_string = text
|
||||
|
||||
# Buffers
|
||||
cdef sundown.buf *ib = sundown.bufnew(128)
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
#define PROCESS_SPAN(method_name, ...) {\
|
||||
struct renderopt *opt = opaque;\
|
||||
PyObject *ret = PyObject_CallMethodObjArgs(\
|
||||
(PyObject *) opt->self, Py_BuildValue("s", method_name),\
|
||||
(PyObject *) opt->self, PyUnicode_FromString(method_name),\
|
||||
__VA_ARGS__);\
|
||||
if (ret == NULL || ret == Py_None) {\
|
||||
PyObject *r_ex = PyErr_Occurred();\
|
||||
|
@ -31,7 +31,7 @@
|
|||
#define PROCESS_BLOCK(method_name, ...) {\
|
||||
struct renderopt *opt = opaque;\
|
||||
PyObject *ret = PyObject_CallMethodObjArgs(\
|
||||
(PyObject *) opt->self, Py_BuildValue("s", method_name),\
|
||||
(PyObject *) opt->self, PyUnicode_FromString(method_name),\
|
||||
__VA_ARGS__);\
|
||||
if (ret == NULL || ret == Py_None) {\
|
||||
PyObject *r_ex = PyErr_Occurred();\
|
||||
|
@ -48,7 +48,7 @@
|
|||
}
|
||||
|
||||
|
||||
#define PY_STR(b) (b != NULL ? Py_BuildValue("s#", b->data, (int) b->size) : Py_None)
|
||||
#define PY_STR(b) (b != NULL ? PyUnicode_FromStringAndSize((const char *) b->data, (int) b->size) : Py_None)
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
#define PY_INT(i) PyLong_FromLong(i)
|
||||
|
|
Loading…
Reference in New Issue