Use CFFI for Hoedown binding.

This commit is contained in:
Frank Smit 2015-06-02 15:03:44 +02:00
parent 21c06eb7ec
commit a75a5ec9df
49 changed files with 6192 additions and 9999 deletions

1
.gitignore vendored
View File

@ -2,6 +2,7 @@ __pycache__
_build
build
dist
.eggs
*.egg-info
*.sublime-*
*.pyc

3
.gitmodules vendored
View File

@ -1,3 +0,0 @@
[submodule "vendor/sundown"]
path = vendor/sundown
url = git://github.com/FSX/sundown.git

View File

@ -1,6 +1,11 @@
News/Changelog
==============
## 2.0.0 (????-??-??)
- Use CFFI instead of Cython.
## 1.0.3 (2012-11-??)
- `scripts/misaka`: Read stdin when no file is specified. ([#22][])

View File

@ -1,4 +1,4 @@
Copyright (C) 2011 by Frank Smit <frank@61924.nl>
Copyright (C) 2011-2015 by Frank Smit <frank@61924.nl>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -3,18 +3,16 @@ Misaka
.. image:: https://secure.travis-ci.org/FSX/misaka.png?branch=master
The Python binding for Sundown_, a markdown parsing library.
A CFFI binding for Hoedown_, a markdown parsing library.
Documentation can be found at: http://misaka.61924.nl/
.. _Sundown: https://github.com/vmg/sundown
.. _Hoedown: https://github.com/hoedown/hoedown
Installation
------------
Cython is only needed to compile .pyx file.
With pip::
pip install misaka

284
build_ffi.py Normal file
View File

@ -0,0 +1,284 @@
import cffi
# Block-level extensions
EXT_TABLES = (1 << 0)
EXT_FENCED_CODE = (1 << 1)
EXT_FOOTNOTES = (1 << 2)
# Span-level extensions
EXT_AUTOLINK = (1 << 3)
EXT_STRIKETHROUGH = (1 << 4)
EXT_UNDERLINE = (1 << 5)
EXT_HIGHLIGHT = (1 << 6)
EXT_QUOTE = (1 << 7)
EXT_SUPERSCRIPT = (1 << 8)
EXT_MATH = (1 << 9)
# Other flags
EXT_NO_INTRA_EMPHASIS = (1 << 11)
EXT_SPACE_HEADERS = (1 << 12)
EXT_MATH_EXPLICIT = (1 << 13)
# Negative flags
EXT_DISABLE_INDENTED_CODE = (1 << 14)
# List flags
LIST_ORDERED = (1 << 0)
LI_BLOCK = (1 << 1) # <li> containing block data
# Table flags
TABLE_ALIGN_LEFT = 1
TABLE_ALIGN_RIGHT = 2
TABLE_ALIGN_CENTER = 3
TABLE_ALIGNMASK = 3
TABLE_HEADER = 4
# HTML flags
HTML_SKIP_HTML = (1 << 0)
HTML_ESCAPE = (1 << 1)
HTML_HARD_WRAP = (1 << 2)
HTML_USE_XHTML = (1 << 3)
# Autolink types
AUTOLINK_NONE = 1 # Used internally when it is not an autolink
AUTOLINK_NORMAL = 2 # Normal http/http/ftp/mailto/etc link
AUTOLINK_EMAIL = 3 # E-mail link without explit mailto:
ffi = cffi.FFI()
ffi.set_source(
'misaka._hoedown',
"""\
#include "extra.h"
#include "hoedown/buffer.h"
#include "hoedown/document.h"
#include "hoedown/html.h"
""",
sources=(
'src/extra.c',
'src/hoedown/version.c',
'src/hoedown/stack.c',
'src/hoedown/html_smartypants.c',
'src/hoedown/html_blocks.c',
'src/hoedown/html.c',
'src/hoedown/escape.c',
'src/hoedown/document.c',
'src/hoedown/buffer.c',
'src/hoedown/autolink.c',
),
include_dirs=('src',))
# NOTE: The constants are refined here, because CFFI
# doesn't parse the bitwise left-shift (<<).
ffi.cdef("""\
// --------------------------
// --- hoedown/document.h ---
// --------------------------
typedef enum hoedown_extensions {{
/* block-level extensions */
HOEDOWN_EXT_TABLES = {},
HOEDOWN_EXT_FENCED_CODE = {},
HOEDOWN_EXT_FOOTNOTES = {},
HOEDOWN_EXT_AUTOLINK = {},
HOEDOWN_EXT_STRIKETHROUGH = {},
HOEDOWN_EXT_UNDERLINE = {},
HOEDOWN_EXT_HIGHLIGHT = {},
HOEDOWN_EXT_QUOTE = {},
HOEDOWN_EXT_SUPERSCRIPT = {},
HOEDOWN_EXT_MATH = {},
HOEDOWN_EXT_NO_INTRA_EMPHASIS = {},
HOEDOWN_EXT_SPACE_HEADERS = {},
HOEDOWN_EXT_MATH_EXPLICIT = {},
HOEDOWN_EXT_DISABLE_INDENTED_CODE = {}
}} hoedown_extensions;
typedef enum hoedown_list_flags {{
HOEDOWN_LIST_ORDERED = {},
HOEDOWN_LI_BLOCK = {}
}} hoedown_list_flags;
typedef enum hoedown_table_flags {{
HOEDOWN_TABLE_ALIGN_LEFT = {},
HOEDOWN_TABLE_ALIGN_RIGHT = {},
HOEDOWN_TABLE_ALIGN_CENTER = {},
HOEDOWN_TABLE_ALIGNMASK = {},
HOEDOWN_TABLE_HEADER = {}
}} hoedown_table_flags;
// ----------------------
// --- hoedown/html.h ---
// ----------------------
typedef enum hoedown_html_flags {{
HOEDOWN_HTML_SKIP_HTML = {},
HOEDOWN_HTML_ESCAPE = {},
HOEDOWN_HTML_HARD_WRAP = {},
HOEDOWN_HTML_USE_XHTML = {}
}} hoedown_html_flags;
""".format(
EXT_TABLES,
EXT_FENCED_CODE,
EXT_FOOTNOTES,
EXT_AUTOLINK,
EXT_STRIKETHROUGH,
EXT_UNDERLINE,
EXT_HIGHLIGHT,
EXT_QUOTE,
EXT_SUPERSCRIPT,
EXT_MATH,
EXT_NO_INTRA_EMPHASIS,
EXT_SPACE_HEADERS,
EXT_MATH_EXPLICIT,
EXT_DISABLE_INDENTED_CODE,
LIST_ORDERED,
LI_BLOCK,
TABLE_ALIGN_LEFT,
TABLE_ALIGN_RIGHT,
TABLE_ALIGN_CENTER,
TABLE_ALIGNMASK,
TABLE_HEADER,
HTML_SKIP_HTML,
HTML_ESCAPE,
HTML_HARD_WRAP,
HTML_USE_XHTML))
ffi.cdef("""\
// ------------------------
// --- hoedown/buffer.h ---
// ------------------------
typedef void *(*hoedown_realloc_callback)(void *, size_t);
typedef void (*hoedown_free_callback)(void *);
struct hoedown_buffer {
uint8_t *data; /* actual character data */
size_t size; /* size of the string */
size_t asize; /* allocated size (0 = volatile buffer) */
size_t unit; /* reallocation unit size (0 = read-only buffer) */
hoedown_realloc_callback data_realloc;
hoedown_free_callback data_free;
hoedown_free_callback buffer_free;
};
typedef struct hoedown_buffer hoedown_buffer;
void *hoedown_malloc(size_t size);
hoedown_buffer *hoedown_buffer_new(size_t unit);
void hoedown_buffer_grow(hoedown_buffer *buf, size_t neosz);
void hoedown_buffer_puts(hoedown_buffer *buf, const char *str);
void hoedown_buffer_free(hoedown_buffer *buf);
// --------------------------
// --- hoedown/document.h ---
// --------------------------
// NOTE: See earlier ff.cdef() for document.h's constants.
typedef enum hoedown_autolink_type {
HOEDOWN_AUTOLINK_NONE, /* used internally when it is not an autolink*/
HOEDOWN_AUTOLINK_NORMAL, /* normal http/http/ftp/mailto/etc link */
HOEDOWN_AUTOLINK_EMAIL /* e-mail link without explit mailto: */
} hoedown_autolink_type;
struct hoedown_document;
typedef struct hoedown_document hoedown_document;
struct hoedown_renderer_data {
void *opaque;
};
typedef struct hoedown_renderer_data hoedown_renderer_data;
/* hoedown_renderer - functions for rendering parsed data */
struct hoedown_renderer {
/* state object */
void *opaque;
/* block level callbacks - NULL skips the block */
void (*blockcode)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_buffer *lang, const hoedown_renderer_data *data);
void (*blockquote)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*header)(hoedown_buffer *ob, const hoedown_buffer *content, int level, const hoedown_renderer_data *data);
void (*hrule)(hoedown_buffer *ob, const hoedown_renderer_data *data);
void (*list)(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_list_flags flags, const hoedown_renderer_data *data);
void (*listitem)(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_list_flags flags, const hoedown_renderer_data *data);
void (*paragraph)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*table)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*table_header)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*table_body)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*table_row)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*table_cell)(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_table_flags flags, const hoedown_renderer_data *data);
void (*footnotes)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*footnote_def)(hoedown_buffer *ob, const hoedown_buffer *content, unsigned int num, const hoedown_renderer_data *data);
void (*blockhtml)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
/* span level callbacks - NULL or return 0 prints the span verbatim */
int (*autolink)(hoedown_buffer *ob, const hoedown_buffer *link, hoedown_autolink_type type, const hoedown_renderer_data *data);
int (*codespan)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
int (*double_emphasis)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*emphasis)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*underline)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*highlight)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*quote)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*image)(hoedown_buffer *ob, const hoedown_buffer *link, const hoedown_buffer *title, const hoedown_buffer *alt, const hoedown_renderer_data *data);
int (*linebreak)(hoedown_buffer *ob, const hoedown_renderer_data *data);
int (*link)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_buffer *link, const hoedown_buffer *title, const hoedown_renderer_data *data);
int (*triple_emphasis)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*strikethrough)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*superscript)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*footnote_ref)(hoedown_buffer *ob, unsigned int num, const hoedown_renderer_data *data);
int (*math)(hoedown_buffer *ob, const hoedown_buffer *text, int displaymode, const hoedown_renderer_data *data);
int (*raw_html)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
/* low level callbacks - NULL copies input directly into the output */
void (*entity)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
void (*normal_text)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
/* miscellaneous callbacks */
void (*doc_header)(hoedown_buffer *ob, int inline_render, const hoedown_renderer_data *data);
void (*doc_footer)(hoedown_buffer *ob, int inline_render, const hoedown_renderer_data *data);
};
typedef struct hoedown_renderer hoedown_renderer;
hoedown_document *hoedown_document_new(
const hoedown_renderer *renderer,
hoedown_extensions extensions,
size_t max_nesting
);
void hoedown_document_render(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size);
void hoedown_document_free(hoedown_document *doc);
// ----------------------
// --- hoedown/html.h ---
// ----------------------
// NOTE: See earlier ff.cdef() for html.h's constants.
typedef enum hoedown_html_tag {
HOEDOWN_HTML_TAG_NONE = 0,
HOEDOWN_HTML_TAG_OPEN,
HOEDOWN_HTML_TAG_CLOSE
} hoedown_html_tag;
hoedown_renderer *hoedown_html_renderer_new(
hoedown_html_flags render_flags,
int nesting_level
);
void hoedown_html_renderer_free(hoedown_renderer *renderer);
// ---------------
// --- extra.h ---
// ---------------
hoedown_renderer *null_renderer_new();
void null_renderer_free(hoedown_renderer *renderer);
""")
if __name__ == '__main__':
ffi.compile()

2
misaka/__init__.py Normal file
View File

@ -0,0 +1,2 @@
from .api import *
from .constants import *

250
misaka/api.py Normal file
View File

@ -0,0 +1,250 @@
from inspect import getmembers, ismethod
from ._hoedown import lib, ffi
__all__ = [
'html',
'Markdown',
'BaseRenderer',
'HtmlRenderer'
]
def html(text, extensions=0, render_flags=0):
ib = lib.hoedown_buffer_new(1024)
ob = lib.hoedown_buffer_new(64)
renderer = lib.hoedown_html_renderer_new(0, 0)
document = lib.hoedown_document_new(renderer, 0, 16);
lib.hoedown_buffer_puts(ib, text.encode('utf-8'))
lib.hoedown_document_render(document, ob, ib.data, ib.size);
lib.hoedown_buffer_free(ib);
lib.hoedown_document_free(document);
lib.hoedown_html_renderer_free(renderer);
try:
return ffi.string(ob.data, ob.size).decode('utf-8')
finally:
lib.hoedown_buffer_free(ob);
class Markdown:
def __init__(self, renderer):
# NOTE: Prevent the renderer from being garbage collected
self.renderer = renderer
def render(self, text):
ib = lib.hoedown_buffer_new(1024)
lib.hoedown_buffer_puts(ib, text.encode('utf-8'))
ob = lib.hoedown_buffer_new(64)
document = lib.hoedown_document_new(self.renderer.renderer, 0, 16);
lib.hoedown_document_render(document, ob, ib.data, ib.size);
lib.hoedown_buffer_free(ib);
lib.hoedown_document_free(document);
try:
return ffi.string(ob.data, ob.size).decode('utf-8')
finally:
lib.hoedown_buffer_free(ob);
callback_signatures = {
# block level callbacks - NULL skips the block
'blockcode': 'void (*blockcode)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_buffer *lang, const hoedown_renderer_data *data)',
'blockquote': 'void (*blockquote)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)',
'header': 'void (*header)(hoedown_buffer *ob, const hoedown_buffer *content, int level, const hoedown_renderer_data *data)',
'hrule': 'void (*hrule)(hoedown_buffer *ob, const hoedown_renderer_data *data)',
'list': 'void (*list)(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_list_flags flags, const hoedown_renderer_data *data)',
'listitem': 'void (*listitem)(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_list_flags flags, const hoedown_renderer_data *data)',
'paragraph': 'void (*paragraph)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)',
'table': 'void (*table)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)',
'table_header': 'void (*table_header)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)',
'table_body': 'void (*table_body)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)',
'table_row': 'void (*table_row)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)',
'table_cell': 'void (*table_cell)(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_table_flags flags, const hoedown_renderer_data *data)',
'footnotes': 'void (*footnotes)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)',
'footnote_def': 'void (*footnote_def)(hoedown_buffer *ob, const hoedown_buffer *content, unsigned int num, const hoedown_renderer_data *data)',
'blockhtml': 'void (*blockhtml)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data)',
# span level callbacks - NULL or return 0 prints the span verbatim
'autolink': 'int (*autolink)(hoedown_buffer *ob, const hoedown_buffer *link, hoedown_autolink_type type, const hoedown_renderer_data *data)',
'codespan': 'int (*codespan)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data)',
'double_emphasis': 'int (*double_emphasis)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)',
'emphasis': 'int (*emphasis)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)',
'underline': 'int (*underline)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)',
'highlight': 'int (*highlight)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)',
'quote': 'int (*quote)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)',
'image': 'int (*image)(hoedown_buffer *ob, const hoedown_buffer *link, const hoedown_buffer *title, const hoedown_buffer *alt, const hoedown_renderer_data *data)',
'linebreak': 'int (*linebreak)(hoedown_buffer *ob, const hoedown_renderer_data *data)',
'link': 'int (*link)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_buffer *link, const hoedown_buffer *title, const hoedown_renderer_data *data)',
'triple_emphasis': 'int (*triple_emphasis)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)',
'strikethrough': 'int (*strikethrough)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)',
'superscript': 'int (*superscript)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)',
'footnote_ref': 'int (*footnote_ref)(hoedown_buffer *ob, unsigned int num, const hoedown_renderer_data *data)',
'math': 'int (*math)(hoedown_buffer *ob, const hoedown_buffer *text, int displaymode, const hoedown_renderer_data *data)',
'raw_html': 'int (*raw_html)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data)',
# low level callbacks - NULL copies input directly into the output
'entity': 'void (*entity)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);',
'normal_text': 'void (*normal_text)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);',
# miscellaneous callbacks
'doc_header': 'void (*doc_header)(hoedown_buffer *ob, int inline_render, const hoedown_renderer_data *data);',
'doc_footer': 'void (*doc_footer)(hoedown_buffer *ob, int inline_render, const hoedown_renderer_data *data);',
}
# TODO: Do this in Python:
# static hoedown_renderer *
# null_renderer_new()
# {
# hoedown_renderer *renderer;
# renderer = hoedown_malloc(sizeof(hoedown_renderer));
# memset(renderer, 0x0, sizeof(hoedown_renderer));
# return renderer;
# }
# static void
# null_renderer_free(hoedown_renderer *renderer)
# {
# free(renderer);
# }
class BaseRenderer:
def __init__(self):
# TODO: Make a null renderer.
self.renderer = None
self.set_callbacks()
def set_callbacks(self):
callbacks = []
for name, func in getmembers(self, predicate=ismethod):
signature = callback_signatures.get(name)
if signature is None:
continue
wrapper = getattr(self, '_w_' + name)
callback = ffi.callback(signature, wrapper)
callbacks.append(callback)
setattr(self.renderer, name, callback)
# Prevent callbacks from being garbage collected.
self._callbacks = callbacks
def _w_blockcode(self, ob, text, lang, data):
text = ffi.string(text.data, text.size).decode('utf-8')
lang = ffi.string(lang.data, lang.size).decode('utf-8')
result = self.blockcode(text, lang)
if result:
lib.hoedown_buffer_puts(ob, result.encode('utf-8'))
def _w_blockquote(self, ob, content, data):
content = ffi.string(content.data, content.size).decode('utf-8')
result = self.blockquote(content)
if result:
lib.hoedown_buffer_puts(ob, result.encode('utf-8'))
def _w_header(self, ob, content, level, data):
content = ffi.string(content.data, content.size).decode('utf-8')
level = int(level)
result = self.header(content, level)
if result:
lib.hoedown_buffer_puts(ob, result.encode('utf-8'))
def _w_hrule(self, ob, data):
result = self.hrule()
if result:
lib.hoedown_buffer_puts(ob, result.encode('utf-8'))
# flags: LIST_ORDERED, LI_BLOCK.
def _w_list(self, ob, content, flags, data):
content = ffi.string(content.data, content.size).decode('utf-8')
flags = int(flags)
result = self.list(content, flags)
if result:
lib.hoedown_buffer_puts(ob, result.encode('utf-8'))
# flags: LIST_ORDERED, LI_BLOCK.
def _w_listitem(self, ob, content, flags, data):
content = ffi.string(content.data, content.size).decode('utf-8')
flags = int(flags)
result = self.listitem(content, flags)
if result:
lib.hoedown_buffer_puts(ob, result.encode('utf-8'))
def _w_paragraph(self, ob, content, data):
content = ffi.string(content.data, content.size).decode('utf-8')
result = self.paragraph(content)
if result:
lib.hoedown_buffer_puts(ob, result.encode('utf-8'))
def _w_table(self, ob, content, data):
content = ffi.string(content.data, content.size).decode('utf-8')
result = self.table(content)
if result:
lib.hoedown_buffer_puts(ob, result.encode('utf-8'))
def _w_table_header(self, ob, content, data):
content = ffi.string(content.data, content.size).decode('utf-8')
result = self.table_header(content)
if result:
lib.hoedown_buffer_puts(ob, result.encode('utf-8'))
def _w_table_body(self, ob, content, data):
content = ffi.string(content.data, content.size).decode('utf-8')
result = self.table_body(content)
if result:
lib.hoedown_buffer_puts(ob, result.encode('utf-8'))
def _w_table_row(self, ob, content, data):
content = ffi.string(content.data, content.size).decode('utf-8')
result = self.table_row(content)
if result:
lib.hoedown_buffer_puts(ob, result.encode('utf-8'))
def _w_table_cell(self, ob, content, flags, data):
content = ffi.string(content.data, content.size).decode('utf-8')
flags = int(flags)
result = self.table_row(content, flags)
if result:
lib.hoedown_buffer_puts(ob, result.encode('utf-8'))
def _w_footnotes(self, ob, content, data):
content = ffi.string(content.data, content.size).decode('utf-8')
result = self.footnotes(content)
if result:
lib.hoedown_buffer_puts(ob, result.encode('utf-8'))
def _w_footnote_def(self, ob, content, num, data):
content = ffi.string(content.data, content.size).decode('utf-8')
num = int(num)
result = self.footnote_def(content, num)
if result:
lib.hoedown_buffer_puts(ob, result.encode('utf-8'))
def _w_blockhtml(self, ob, text, data):
text = ffi.string(text.data, text.size).decode('utf-8')
result = self.blockhtml(text)
if result:
lib.hoedown_buffer_puts(ob, result.encode('utf-8'))
def _w_emphasis(self, ob, content, data):
content = ffi.string(content.data, content.size).decode('utf-8')
result = self.emphasis(content)
if result:
lib.hoedown_buffer_puts(ob, result.encode('utf-8'))
return 1
return 0
class HtmlRenderer(BaseRenderer):
def __init__(self):
self.renderer = lib.hoedown_html_renderer_new(0, 0)
self.set_callbacks()
def __del__(self):
lib.hoedown_html_renderer_free(self.renderer)

43
misaka/constants.py Normal file
View File

@ -0,0 +1,43 @@
# Block-level extensions
EXT_TABLES = (1 << 0)
EXT_FENCED_CODE = (1 << 1)
EXT_FOOTNOTES = (1 << 2)
# Span-level extensions
EXT_AUTOLINK = (1 << 3)
EXT_STRIKETHROUGH = (1 << 4)
EXT_UNDERLINE = (1 << 5)
EXT_HIGHLIGHT = (1 << 6)
EXT_QUOTE = (1 << 7)
EXT_SUPERSCRIPT = (1 << 8)
EXT_MATH = (1 << 9)
# Other flags
EXT_NO_INTRA_EMPHASIS = (1 << 11)
EXT_SPACE_HEADERS = (1 << 12)
EXT_MATH_EXPLICIT = (1 << 13)
# Negative flags
EXT_DISABLE_INDENTED_CODE = (1 << 14)
# List flags
LIST_ORDERED = (1 << 0)
LI_BLOCK = (1 << 1) # <li> containing block data
# Table flags
TABLE_ALIGN_LEFT = 1
TABLE_ALIGN_RIGHT = 2
TABLE_ALIGN_CENTER = 3
TABLE_ALIGNMASK = 3
TABLE_HEADER = 4
# HTML flags
HTML_SKIP_HTML = (1 << 0)
HTML_ESCAPE = (1 << 1)
HTML_HARD_WRAP = (1 << 2)
HTML_USE_XHTML = (1 << 3)
# Autolink types
AUTOLINK_NONE = 1 # Used internally when it is not an autolink
AUTOLINK_NORMAL = 2 # Normal http/http/ftp/mailto/etc link
AUTOLINK_EMAIL = 3 # E-mail link without explit mailto:

View File

@ -1,12 +1,7 @@
import os
import glob
import shutil
import os.path
try:
from setuptools import setup, Extension, Command
except ImportError:
from distutils.core import setup, Extension, Command
from setuptools import setup, Command
dirname = os.path.dirname(os.path.abspath(__file__))
@ -23,43 +18,13 @@ class BaseCommand(Command):
class CleanCommand(BaseCommand):
description = 'cleanup directories created by packaging and build processes'
def run(self):
for path in ['build', 'dist', 'misaka.egg-info', 'docs/_build']:
for path in ('build', 'dist', 'misaka.egg-info', 'docs/_build'):
if os.path.exists(path):
path = os.path.join(dirname, path)
print('removing %s' % path)
shutil.rmtree(path)
class CythonCommand(BaseCommand):
description = 'compile Cython files(s) into C file(s)'
def run(self):
try:
from Cython.Compiler.Main import compile
path = os.path.join(dirname, 'src', 'misaka.pyx')
print('compiling %s' % path)
compile(path)
except ImportError:
print('Cython is not installed. Please install Cython first.')
class VendorCommand(BaseCommand):
description = 'update Sundown files. Use `git submodule init`, '\
'`git submodule update` and `git submodule foreach git pull origin master`'\
' to the most recent files'
def run(self):
files = []
dest = os.path.join(dirname, 'src/sundown')
for path in ['vendor/sundown/src/*', 'vendor/sundown/html/*']:
files += glob.glob(os.path.join(dirname, path))
for path in files:
if os.path.exists(path):
print('copy %s -> %s' % (path, dest))
shutil.copy(path, dest)
class TestCommand(BaseCommand):
description = 'run unit tests'
def run(self):
@ -68,42 +33,29 @@ class TestCommand(BaseCommand):
setup(
name='misaka',
version='1.0.3',
description='The Python binding for Sundown, a markdown parsing library.',
version='2.0.0',
description='A CFFI binding for Hoedown, a markdown parsing library.',
author='Frank Smit',
author_email='frank@61924.nl',
url='http://misaka.61924.nl/',
url='https://github.com/FSX/misaka',
license='MIT',
long_description=open(os.path.join(dirname, 'README.rst')).read(),
scripts=['scripts/misaka'],
cmdclass={
'clean': CleanCommand,
'compile_cython': CythonCommand,
'update_vendor': VendorCommand,
'test': TestCommand
},
ext_modules=[Extension('misaka', [
'src/misaka.c',
'src/wrapper.c',
'src/sundown/stack.c',
'src/sundown/buffer.c',
'src/sundown/markdown.c',
'src/sundown/html.c',
'src/sundown/html_smartypants.c',
'src/sundown/houdini_href_e.c',
'src/sundown/houdini_html_e.c',
'src/sundown/autolink.c'
])],
classifiers = [
'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
'License :: OSI Approved :: MIT License',
'Programming Language :: C',
'Programming Language :: Cython',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.2',
'Programming Language :: Python :: 3.4',
'Topic :: Text Processing :: Markup',
'Topic :: Text Processing :: Markup :: HTML',
'Topic :: Utilities'
]
],
setup_requires=['cffi>=1.0.0'],
install_requires=['cffi>=1.0.0'],
cffi_modules=['build_ffi.py:ffi'],
)

21
src/extra.c Normal file
View File

@ -0,0 +1,21 @@
#include "extra.h"
#include <string.h>
#include "hoedown/buffer.h"
#include "hoedown/document.h"
hoedown_renderer *null_renderer_new()
{
hoedown_renderer *renderer;
renderer = hoedown_malloc(sizeof(hoedown_renderer));
memset(renderer, 0x0, sizeof(hoedown_renderer));
return renderer;
}
void null_renderer_free(hoedown_renderer *renderer)
{
free(renderer);
}

19
src/extra.h Normal file
View File

@ -0,0 +1,19 @@
/* extra.h - Helper functions */
#ifndef MICHIKO_EXTRA_H
#define MICHIKO_EXTRA_H
#include "hoedown/document.h"
#ifdef __cplusplus
extern "C" {
#endif
hoedown_renderer *null_renderer_new(void);
void null_renderer_free(hoedown_renderer *renderer);
#ifdef __cplusplus
}
#endif
#endif /** MICHIKO_EXTRA_H **/

View File

@ -1,20 +1,3 @@
/*
* Copyright (c) 2011, Vicent Marti
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "buffer.h"
#include "autolink.h"
#include <string.h>
@ -22,26 +5,28 @@
#include <stdio.h>
#include <ctype.h>
#if defined(_WIN32)
#ifndef _MSC_VER
#include <strings.h>
#else
#define strncasecmp _strnicmp
#endif
int
sd_autolink_issafe(const uint8_t *link, size_t link_len)
hoedown_autolink_is_safe(const uint8_t *data, size_t size)
{
static const size_t valid_uris_count = 5;
static const size_t valid_uris_count = 6;
static const char *valid_uris[] = {
"/", "http://", "https://", "ftp://", "mailto:"
"http://", "https://", "/", "#", "ftp://", "mailto:"
};
static const size_t valid_uris_size[] = { 7, 8, 1, 1, 6, 7 };
size_t i;
for (i = 0; i < valid_uris_count; ++i) {
size_t len = strlen(valid_uris[i]);
size_t len = valid_uris_size[i];
if (link_len > len &&
strncasecmp((char *)link, valid_uris[i], len) == 0 &&
isalnum(link[len]))
if (size > len &&
strncasecmp((char *)data, valid_uris[i], len) == 0 &&
isalnum(data[len]))
return 1;
}
@ -61,7 +46,7 @@ autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
}
while (link_end > 0) {
if (strchr("?!.,", data[link_end - 1]) != NULL)
if (strchr("?!.,:", data[link_end - 1]) != NULL)
link_end--;
else if (data[link_end - 1] == ';') {
@ -141,7 +126,7 @@ check_domain(uint8_t *data, size_t size, int allow_short)
return 0;
for (i = 1; i < size - 1; ++i) {
if (data[i] == '.') np++;
if (strchr(".:", data[i]) != NULL) np++;
else if (!isalnum(data[i]) && data[i] != '-') break;
}
@ -159,9 +144,9 @@ check_domain(uint8_t *data, size_t size, int allow_short)
}
size_t
sd_autolink__www(
hoedown_autolink__www(
size_t *rewind_p,
struct buf *link,
hoedown_buffer *link,
uint8_t *data,
size_t max_rewind,
size_t size,
@ -188,16 +173,16 @@ sd_autolink__www(
if (link_end == 0)
return 0;
bufput(link, data, link_end);
hoedown_buffer_put(link, data, link_end);
*rewind_p = 0;
return (int)link_end;
}
size_t
sd_autolink__email(
hoedown_autolink__email(
size_t *rewind_p,
struct buf *link,
hoedown_buffer *link,
uint8_t *data,
size_t max_rewind,
size_t size,
@ -207,7 +192,7 @@ sd_autolink__email(
int nb = 0, np = 0;
for (rewind = 0; rewind < max_rewind; ++rewind) {
uint8_t c = data[-rewind - 1];
uint8_t c = data[-1 - rewind];
if (isalnum(c))
continue;
@ -244,16 +229,16 @@ sd_autolink__email(
if (link_end == 0)
return 0;
bufput(link, data - rewind, link_end + rewind);
hoedown_buffer_put(link, data - rewind, link_end + rewind);
*rewind_p = rewind;
return link_end;
}
size_t
sd_autolink__url(
hoedown_autolink__url(
size_t *rewind_p,
struct buf *link,
hoedown_buffer *link,
uint8_t *data,
size_t max_rewind,
size_t size,
@ -264,10 +249,10 @@ sd_autolink__url(
if (size < 4 || data[1] != '/' || data[2] != '/')
return 0;
while (rewind < max_rewind && isalpha(data[-rewind - 1]))
while (rewind < max_rewind && isalpha(data[-1 - rewind]))
rewind++;
if (!sd_autolink_issafe(data - rewind, size + rewind))
if (!hoedown_autolink_is_safe(data - rewind, size + rewind))
return 0;
link_end = strlen("://");
@ -275,7 +260,7 @@ sd_autolink__url(
domain_len = check_domain(
data + link_end,
size - link_end,
flags & SD_AUTOLINK_SHORT_DOMAINS);
flags & HOEDOWN_AUTOLINK_SHORT_DOMAINS);
if (domain_len == 0)
return 0;
@ -289,9 +274,8 @@ sd_autolink__url(
if (link_end == 0)
return 0;
bufput(link, data - rewind, link_end + rewind);
hoedown_buffer_put(link, data - rewind, link_end + rewind);
*rewind_p = rewind;
return link_end;
}

46
src/hoedown/autolink.h Normal file
View File

@ -0,0 +1,46 @@
/* autolink.h - versatile autolinker */
#ifndef HOEDOWN_AUTOLINK_H
#define HOEDOWN_AUTOLINK_H
#include "buffer.h"
#ifdef __cplusplus
extern "C" {
#endif
/*************
* CONSTANTS *
*************/
typedef enum hoedown_autolink_flags {
HOEDOWN_AUTOLINK_SHORT_DOMAINS = (1 << 0)
} hoedown_autolink_flags;
/*************
* FUNCTIONS *
*************/
/* hoedown_autolink_is_safe: verify that a URL has a safe protocol */
int hoedown_autolink_is_safe(const uint8_t *data, size_t size);
/* hoedown_autolink__www: search for the next www link in data */
size_t hoedown_autolink__www(size_t *rewind_p, hoedown_buffer *link,
uint8_t *data, size_t offset, size_t size, hoedown_autolink_flags flags);
/* hoedown_autolink__email: search for the next email in data */
size_t hoedown_autolink__email(size_t *rewind_p, hoedown_buffer *link,
uint8_t *data, size_t offset, size_t size, hoedown_autolink_flags flags);
/* hoedown_autolink__url: search for the next URL in data */
size_t hoedown_autolink__url(size_t *rewind_p, hoedown_buffer *link,
uint8_t *data, size_t offset, size_t size, hoedown_autolink_flags flags);
#ifdef __cplusplus
}
#endif
#endif /** HOEDOWN_AUTOLINK_H **/

308
src/hoedown/buffer.c Normal file
View File

@ -0,0 +1,308 @@
#include "buffer.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
void *
hoedown_malloc(size_t size)
{
void *ret = malloc(size);
if (!ret) {
fprintf(stderr, "Allocation failed.\n");
abort();
}
return ret;
}
void *
hoedown_calloc(size_t nmemb, size_t size)
{
void *ret = calloc(nmemb, size);
if (!ret) {
fprintf(stderr, "Allocation failed.\n");
abort();
}
return ret;
}
void *
hoedown_realloc(void *ptr, size_t size)
{
void *ret = realloc(ptr, size);
if (!ret) {
fprintf(stderr, "Allocation failed.\n");
abort();
}
return ret;
}
void
hoedown_buffer_init(
hoedown_buffer *buf,
size_t unit,
hoedown_realloc_callback data_realloc,
hoedown_free_callback data_free,
hoedown_free_callback buffer_free)
{
assert(buf);
buf->data = NULL;
buf->size = buf->asize = 0;
buf->unit = unit;
buf->data_realloc = data_realloc;
buf->data_free = data_free;
buf->buffer_free = buffer_free;
}
void
hoedown_buffer_uninit(hoedown_buffer *buf)
{
assert(buf && buf->unit);
buf->data_free(buf->data);
}
hoedown_buffer *
hoedown_buffer_new(size_t unit)
{
hoedown_buffer *ret = hoedown_malloc(sizeof (hoedown_buffer));
hoedown_buffer_init(ret, unit, hoedown_realloc, free, free);
return ret;
}
void
hoedown_buffer_free(hoedown_buffer *buf)
{
if (!buf) return;
assert(buf && buf->unit);
buf->data_free(buf->data);
if (buf->buffer_free)
buf->buffer_free(buf);
}
void
hoedown_buffer_reset(hoedown_buffer *buf)
{
assert(buf && buf->unit);
buf->data_free(buf->data);
buf->data = NULL;
buf->size = buf->asize = 0;
}
void
hoedown_buffer_grow(hoedown_buffer *buf, size_t neosz)
{
size_t neoasz;
assert(buf && buf->unit);
if (buf->asize >= neosz)
return;
neoasz = buf->asize + buf->unit;
while (neoasz < neosz)
neoasz += buf->unit;
buf->data = buf->data_realloc(buf->data, neoasz);
buf->asize = neoasz;
}
void
hoedown_buffer_put(hoedown_buffer *buf, const uint8_t *data, size_t size)
{
assert(buf && buf->unit);
if (buf->size + size > buf->asize)
hoedown_buffer_grow(buf, buf->size + size);
memcpy(buf->data + buf->size, data, size);
buf->size += size;
}
void
hoedown_buffer_puts(hoedown_buffer *buf, const char *str)
{
hoedown_buffer_put(buf, (const uint8_t *)str, strlen(str));
}
void
hoedown_buffer_putc(hoedown_buffer *buf, uint8_t c)
{
assert(buf && buf->unit);
if (buf->size >= buf->asize)
hoedown_buffer_grow(buf, buf->size + 1);
buf->data[buf->size] = c;
buf->size += 1;
}
int
hoedown_buffer_putf(hoedown_buffer *buf, FILE *file)
{
assert(buf && buf->unit);
while (!(feof(file) || ferror(file))) {
hoedown_buffer_grow(buf, buf->size + buf->unit);
buf->size += fread(buf->data + buf->size, 1, buf->unit, file);
}
return ferror(file);
}
void
hoedown_buffer_set(hoedown_buffer *buf, const uint8_t *data, size_t size)
{
assert(buf && buf->unit);
if (size > buf->asize)
hoedown_buffer_grow(buf, size);
memcpy(buf->data, data, size);
buf->size = size;
}
void
hoedown_buffer_sets(hoedown_buffer *buf, const char *str)
{
hoedown_buffer_set(buf, (const uint8_t *)str, strlen(str));
}
int
hoedown_buffer_eq(const hoedown_buffer *buf, const uint8_t *data, size_t size)
{
if (buf->size != size) return 0;
return memcmp(buf->data, data, size) == 0;
}
int
hoedown_buffer_eqs(const hoedown_buffer *buf, const char *str)
{
return hoedown_buffer_eq(buf, (const uint8_t *)str, strlen(str));
}
int
hoedown_buffer_prefix(const hoedown_buffer *buf, const char *prefix)
{
size_t i;
for (i = 0; i < buf->size; ++i) {
if (prefix[i] == 0)
return 0;
if (buf->data[i] != prefix[i])
return buf->data[i] - prefix[i];
}
return 0;
}
void
hoedown_buffer_slurp(hoedown_buffer *buf, size_t size)
{
assert(buf && buf->unit);
if (size >= buf->size) {
buf->size = 0;
return;
}
buf->size -= size;
memmove(buf->data, buf->data + size, buf->size);
}
const char *
hoedown_buffer_cstr(hoedown_buffer *buf)
{
assert(buf && buf->unit);
if (buf->size < buf->asize && buf->data[buf->size] == 0)
return (char *)buf->data;
hoedown_buffer_grow(buf, buf->size + 1);
buf->data[buf->size] = 0;
return (char *)buf->data;
}
void
hoedown_buffer_printf(hoedown_buffer *buf, const char *fmt, ...)
{
va_list ap;
int n;
assert(buf && buf->unit);
if (buf->size >= buf->asize)
hoedown_buffer_grow(buf, buf->size + 1);
va_start(ap, fmt);
n = vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
va_end(ap);
if (n < 0) {
#ifndef _MSC_VER
return;
#else
va_start(ap, fmt);
n = _vscprintf(fmt, ap);
va_end(ap);
#endif
}
if ((size_t)n >= buf->asize - buf->size) {
hoedown_buffer_grow(buf, buf->size + n + 1);
va_start(ap, fmt);
n = vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
va_end(ap);
}
if (n < 0)
return;
buf->size += n;
}
void hoedown_buffer_put_utf8(hoedown_buffer *buf, unsigned int c) {
unsigned char unichar[4];
assert(buf && buf->unit);
if (c < 0x80) {
hoedown_buffer_putc(buf, c);
}
else if (c < 0x800) {
unichar[0] = 192 + (c / 64);
unichar[1] = 128 + (c % 64);
hoedown_buffer_put(buf, unichar, 2);
}
else if (c - 0xd800u < 0x800) {
HOEDOWN_BUFPUTSL(buf, "\xef\xbf\xbd");
}
else if (c < 0x10000) {
unichar[0] = 224 + (c / 4096);
unichar[1] = 128 + (c / 64) % 64;
unichar[2] = 128 + (c % 64);
hoedown_buffer_put(buf, unichar, 3);
}
else if (c < 0x110000) {
unichar[0] = 240 + (c / 262144);
unichar[1] = 128 + (c / 4096) % 64;
unichar[2] = 128 + (c / 64) % 64;
unichar[3] = 128 + (c % 64);
hoedown_buffer_put(buf, unichar, 4);
}
else {
HOEDOWN_BUFPUTSL(buf, "\xef\xbf\xbd");
}
}

134
src/hoedown/buffer.h Normal file
View File

@ -0,0 +1,134 @@
/* buffer.h - simple, fast buffers */
#ifndef HOEDOWN_BUFFER_H
#define HOEDOWN_BUFFER_H
#include <stdio.h>
#include <stddef.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
#if defined(_MSC_VER)
#define __attribute__(x)
#define inline __inline
#define __builtin_expect(x,n) x
#endif
/*********
* TYPES *
*********/
typedef void *(*hoedown_realloc_callback)(void *, size_t);
typedef void (*hoedown_free_callback)(void *);
struct hoedown_buffer {
uint8_t *data; /* actual character data */
size_t size; /* size of the string */
size_t asize; /* allocated size (0 = volatile buffer) */
size_t unit; /* reallocation unit size (0 = read-only buffer) */
hoedown_realloc_callback data_realloc;
hoedown_free_callback data_free;
hoedown_free_callback buffer_free;
};
typedef struct hoedown_buffer hoedown_buffer;
/*************
* FUNCTIONS *
*************/
/* allocation wrappers */
void *hoedown_malloc(size_t size) __attribute__ ((malloc));
void *hoedown_calloc(size_t nmemb, size_t size) __attribute__ ((malloc));
void *hoedown_realloc(void *ptr, size_t size) __attribute__ ((malloc));
/* hoedown_buffer_init: initialize a buffer with custom allocators */
void hoedown_buffer_init(
hoedown_buffer *buffer,
size_t unit,
hoedown_realloc_callback data_realloc,
hoedown_free_callback data_free,
hoedown_free_callback buffer_free
);
/* hoedown_buffer_uninit: uninitialize an existing buffer */
void hoedown_buffer_uninit(hoedown_buffer *buf);
/* hoedown_buffer_new: allocate a new buffer */
hoedown_buffer *hoedown_buffer_new(size_t unit) __attribute__ ((malloc));
/* hoedown_buffer_reset: free internal data of the buffer */
void hoedown_buffer_reset(hoedown_buffer *buf);
/* hoedown_buffer_grow: increase the allocated size to the given value */
void hoedown_buffer_grow(hoedown_buffer *buf, size_t neosz);
/* hoedown_buffer_put: append raw data to a buffer */
void hoedown_buffer_put(hoedown_buffer *buf, const uint8_t *data, size_t size);
/* hoedown_buffer_puts: append a NUL-terminated string to a buffer */
void hoedown_buffer_puts(hoedown_buffer *buf, const char *str);
/* hoedown_buffer_putc: append a single char to a buffer */
void hoedown_buffer_putc(hoedown_buffer *buf, uint8_t c);
/* hoedown_buffer_putf: read from a file and append to a buffer, until EOF or error */
int hoedown_buffer_putf(hoedown_buffer *buf, FILE* file);
/* hoedown_buffer_set: replace the buffer's contents with raw data */
void hoedown_buffer_set(hoedown_buffer *buf, const uint8_t *data, size_t size);
/* hoedown_buffer_sets: replace the buffer's contents with a NUL-terminated string */
void hoedown_buffer_sets(hoedown_buffer *buf, const char *str);
/* hoedown_buffer_eq: compare a buffer's data with other data for equality */
int hoedown_buffer_eq(const hoedown_buffer *buf, const uint8_t *data, size_t size);
/* hoedown_buffer_eq: compare a buffer's data with NUL-terminated string for equality */
int hoedown_buffer_eqs(const hoedown_buffer *buf, const char *str);
/* hoedown_buffer_prefix: compare the beginning of a buffer with a string */
int hoedown_buffer_prefix(const hoedown_buffer *buf, const char *prefix);
/* hoedown_buffer_slurp: remove a given number of bytes from the head of the buffer */
void hoedown_buffer_slurp(hoedown_buffer *buf, size_t size);
/* hoedown_buffer_cstr: NUL-termination of the string array (making a C-string) */
const char *hoedown_buffer_cstr(hoedown_buffer *buf);
/* hoedown_buffer_printf: formatted printing to a buffer */
void hoedown_buffer_printf(hoedown_buffer *buf, const char *fmt, ...) __attribute__ ((format (printf, 2, 3)));
/* hoedown_buffer_put_utf8: put a Unicode character encoded as UTF-8 */
void hoedown_buffer_put_utf8(hoedown_buffer *buf, unsigned int codepoint);
/* hoedown_buffer_free: free the buffer */
void hoedown_buffer_free(hoedown_buffer *buf);
/* HOEDOWN_BUFPUTSL: optimized hoedown_buffer_puts of a string literal */
#define HOEDOWN_BUFPUTSL(output, literal) \
hoedown_buffer_put(output, (const uint8_t *)literal, sizeof(literal) - 1)
/* HOEDOWN_BUFSETSL: optimized hoedown_buffer_sets of a string literal */
#define HOEDOWN_BUFSETSL(output, literal) \
hoedown_buffer_set(output, (const uint8_t *)literal, sizeof(literal) - 1)
/* HOEDOWN_BUFEQSL: optimized hoedown_buffer_eqs of a string literal */
#define HOEDOWN_BUFEQSL(output, literal) \
hoedown_buffer_eq(output, (const uint8_t *)literal, sizeof(literal) - 1)
#ifdef __cplusplus
}
#endif
#endif /** HOEDOWN_BUFFER_H **/

2966
src/hoedown/document.c Normal file

File diff suppressed because it is too large Load Diff

172
src/hoedown/document.h Normal file
View File

@ -0,0 +1,172 @@
/* document.h - generic markdown parser */
#ifndef HOEDOWN_DOCUMENT_H
#define HOEDOWN_DOCUMENT_H
#include "buffer.h"
#include "autolink.h"
#ifdef __cplusplus
extern "C" {
#endif
/*************
* CONSTANTS *
*************/
typedef enum hoedown_extensions {
/* block-level extensions */
HOEDOWN_EXT_TABLES = (1 << 0),
HOEDOWN_EXT_FENCED_CODE = (1 << 1),
HOEDOWN_EXT_FOOTNOTES = (1 << 2),
/* span-level extensions */
HOEDOWN_EXT_AUTOLINK = (1 << 3),
HOEDOWN_EXT_STRIKETHROUGH = (1 << 4),
HOEDOWN_EXT_UNDERLINE = (1 << 5),
HOEDOWN_EXT_HIGHLIGHT = (1 << 6),
HOEDOWN_EXT_QUOTE = (1 << 7),
HOEDOWN_EXT_SUPERSCRIPT = (1 << 8),
HOEDOWN_EXT_MATH = (1 << 9),
/* other flags */
HOEDOWN_EXT_NO_INTRA_EMPHASIS = (1 << 11),
HOEDOWN_EXT_SPACE_HEADERS = (1 << 12),
HOEDOWN_EXT_MATH_EXPLICIT = (1 << 13),
/* negative flags */
HOEDOWN_EXT_DISABLE_INDENTED_CODE = (1 << 14)
} hoedown_extensions;
#define HOEDOWN_EXT_BLOCK (\
HOEDOWN_EXT_TABLES |\
HOEDOWN_EXT_FENCED_CODE |\
HOEDOWN_EXT_FOOTNOTES )
#define HOEDOWN_EXT_SPAN (\
HOEDOWN_EXT_AUTOLINK |\
HOEDOWN_EXT_STRIKETHROUGH |\
HOEDOWN_EXT_UNDERLINE |\
HOEDOWN_EXT_HIGHLIGHT |\
HOEDOWN_EXT_QUOTE |\
HOEDOWN_EXT_SUPERSCRIPT |\
HOEDOWN_EXT_MATH )
#define HOEDOWN_EXT_FLAGS (\
HOEDOWN_EXT_NO_INTRA_EMPHASIS |\
HOEDOWN_EXT_SPACE_HEADERS |\
HOEDOWN_EXT_MATH_EXPLICIT )
#define HOEDOWN_EXT_NEGATIVE (\
HOEDOWN_EXT_DISABLE_INDENTED_CODE )
typedef enum hoedown_list_flags {
HOEDOWN_LIST_ORDERED = (1 << 0),
HOEDOWN_LI_BLOCK = (1 << 1) /* <li> containing block data */
} hoedown_list_flags;
typedef enum hoedown_table_flags {
HOEDOWN_TABLE_ALIGN_LEFT = 1,
HOEDOWN_TABLE_ALIGN_RIGHT = 2,
HOEDOWN_TABLE_ALIGN_CENTER = 3,
HOEDOWN_TABLE_ALIGNMASK = 3,
HOEDOWN_TABLE_HEADER = 4
} hoedown_table_flags;
typedef enum hoedown_autolink_type {
HOEDOWN_AUTOLINK_NONE, /* used internally when it is not an autolink*/
HOEDOWN_AUTOLINK_NORMAL, /* normal http/http/ftp/mailto/etc link */
HOEDOWN_AUTOLINK_EMAIL /* e-mail link without explit mailto: */
} hoedown_autolink_type;
/*********
* TYPES *
*********/
struct hoedown_document;
typedef struct hoedown_document hoedown_document;
struct hoedown_renderer_data {
void *opaque;
};
typedef struct hoedown_renderer_data hoedown_renderer_data;
/* hoedown_renderer - functions for rendering parsed data */
struct hoedown_renderer {
/* state object */
void *opaque;
/* block level callbacks - NULL skips the block */
void (*blockcode)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_buffer *lang, const hoedown_renderer_data *data);
void (*blockquote)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*header)(hoedown_buffer *ob, const hoedown_buffer *content, int level, const hoedown_renderer_data *data);
void (*hrule)(hoedown_buffer *ob, const hoedown_renderer_data *data);
void (*list)(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_list_flags flags, const hoedown_renderer_data *data);
void (*listitem)(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_list_flags flags, const hoedown_renderer_data *data);
void (*paragraph)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*table)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*table_header)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*table_body)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*table_row)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*table_cell)(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_table_flags flags, const hoedown_renderer_data *data);
void (*footnotes)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*footnote_def)(hoedown_buffer *ob, const hoedown_buffer *content, unsigned int num, const hoedown_renderer_data *data);
void (*blockhtml)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
/* span level callbacks - NULL or return 0 prints the span verbatim */
int (*autolink)(hoedown_buffer *ob, const hoedown_buffer *link, hoedown_autolink_type type, const hoedown_renderer_data *data);
int (*codespan)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
int (*double_emphasis)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*emphasis)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*underline)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*highlight)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*quote)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*image)(hoedown_buffer *ob, const hoedown_buffer *link, const hoedown_buffer *title, const hoedown_buffer *alt, const hoedown_renderer_data *data);
int (*linebreak)(hoedown_buffer *ob, const hoedown_renderer_data *data);
int (*link)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_buffer *link, const hoedown_buffer *title, const hoedown_renderer_data *data);
int (*triple_emphasis)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*strikethrough)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*superscript)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*footnote_ref)(hoedown_buffer *ob, unsigned int num, const hoedown_renderer_data *data);
int (*math)(hoedown_buffer *ob, const hoedown_buffer *text, int displaymode, const hoedown_renderer_data *data);
int (*raw_html)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
/* low level callbacks - NULL copies input directly into the output */
void (*entity)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
void (*normal_text)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
/* miscellaneous callbacks */
void (*doc_header)(hoedown_buffer *ob, int inline_render, const hoedown_renderer_data *data);
void (*doc_footer)(hoedown_buffer *ob, int inline_render, const hoedown_renderer_data *data);
};
typedef struct hoedown_renderer hoedown_renderer;
/*************
* FUNCTIONS *
*************/
/* hoedown_document_new: allocate a new document processor instance */
hoedown_document *hoedown_document_new(
const hoedown_renderer *renderer,
hoedown_extensions extensions,
size_t max_nesting
) __attribute__ ((malloc));
/* hoedown_document_render: render regular Markdown using the document processor */
void hoedown_document_render(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size);
/* hoedown_document_render_inline: render inline Markdown using the document processor */
void hoedown_document_render_inline(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size);
/* hoedown_document_free: deallocate a document processor instance */
void hoedown_document_free(hoedown_document *doc);
#ifdef __cplusplus
}
#endif
#endif /** HOEDOWN_DOCUMENT_H **/

188
src/hoedown/escape.c Normal file
View File

@ -0,0 +1,188 @@
#include "escape.h"
#include <assert.h>
#include <stdio.h>
#include <string.h>
#define likely(x) __builtin_expect((x),1)
#define unlikely(x) __builtin_expect((x),0)
/*
* The following characters will not be escaped:
*
* -_.+!*'(),%#@?=;:/,+&$ alphanum
*
* Note that this character set is the addition of:
*
* - The characters which are safe to be in an URL
* - The characters which are *not* safe to be in
* an URL because they are RESERVED characters.
*
* We assume (lazily) that any RESERVED char that
* appears inside an URL is actually meant to
* have its native function (i.e. as an URL
* component/separator) and hence needs no escaping.
*
* There are two exceptions: the chacters & (amp)
* and ' (single quote) do not appear in the table.
* They are meant to appear in the URL as components,
* yet they require special HTML-entity escaping
* to generate valid HTML markup.
*
* All other characters will be escaped to %XX.
*
*/
static const uint8_t HREF_SAFE[UINT8_MAX+1] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
void
hoedown_escape_href(hoedown_buffer *ob, const uint8_t *data, size_t size)
{
static const char hex_chars[] = "0123456789ABCDEF";
size_t i = 0, mark;
char hex_str[3];
hex_str[0] = '%';
while (i < size) {
mark = i;
while (i < size && HREF_SAFE[data[i]]) i++;
/* Optimization for cases where there's nothing to escape */
if (mark == 0 && i >= size) {
hoedown_buffer_put(ob, data, size);
return;
}
if (likely(i > mark)) {
hoedown_buffer_put(ob, data + mark, i - mark);
}
/* escaping */
if (i >= size)
break;
switch (data[i]) {
/* amp appears all the time in URLs, but needs
* HTML-entity escaping to be inside an href */
case '&':
HOEDOWN_BUFPUTSL(ob, "&amp;");
break;
/* the single quote is a valid URL character
* according to the standard; it needs HTML
* entity escaping too */
case '\'':
HOEDOWN_BUFPUTSL(ob, "&#x27;");
break;
/* the space can be escaped to %20 or a plus
* sign. we're going with the generic escape
* for now. the plus thing is more commonly seen
* when building GET strings */
#if 0
case ' ':
hoedown_buffer_putc(ob, '+');
break;
#endif
/* every other character goes with a %XX escaping */
default:
hex_str[1] = hex_chars[(data[i] >> 4) & 0xF];
hex_str[2] = hex_chars[data[i] & 0xF];
hoedown_buffer_put(ob, (uint8_t *)hex_str, 3);
}
i++;
}
}
/**
* According to the OWASP rules:
*
* & --> &amp;
* < --> &lt;
* > --> &gt;
* " --> &quot;
* ' --> &#x27; &apos; is not recommended
* / --> &#x2F; forward slash is included as it helps end an HTML entity
*
*/
static const uint8_t HTML_ESCAPE_TABLE[UINT8_MAX+1] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static const char *HTML_ESCAPES[] = {
"",
"&quot;",
"&amp;",
"&#39;",
"&#47;",
"&lt;",
"&gt;"
};
void
hoedown_escape_html(hoedown_buffer *ob, const uint8_t *data, size_t size, int secure)
{
size_t i = 0, mark;
while (1) {
mark = i;
while (i < size && HTML_ESCAPE_TABLE[data[i]] == 0) i++;
/* Optimization for cases where there's nothing to escape */
if (mark == 0 && i >= size) {
hoedown_buffer_put(ob, data, size);
return;
}
if (likely(i > mark))
hoedown_buffer_put(ob, data + mark, i - mark);
if (i >= size) break;
/* The forward slash is only escaped in secure mode */
if (!secure && data[i] == '/') {
hoedown_buffer_putc(ob, '/');
} else {
hoedown_buffer_puts(ob, HTML_ESCAPES[HTML_ESCAPE_TABLE[data[i]]]);
}
i++;
}
}

28
src/hoedown/escape.h Normal file
View File

@ -0,0 +1,28 @@
/* escape.h - escape utilities */
#ifndef HOEDOWN_ESCAPE_H
#define HOEDOWN_ESCAPE_H
#include "buffer.h"
#ifdef __cplusplus
extern "C" {
#endif
/*************
* FUNCTIONS *
*************/
/* hoedown_escape_href: escape (part of) a URL inside HTML */
void hoedown_escape_href(hoedown_buffer *ob, const uint8_t *data, size_t size);
/* hoedown_escape_html: escape HTML */
void hoedown_escape_html(hoedown_buffer *ob, const uint8_t *data, size_t size, int secure);
#ifdef __cplusplus
}
#endif
#endif /** HOEDOWN_ESCAPE_H **/

754
src/hoedown/html.c Normal file
View File

@ -0,0 +1,754 @@
#include "html.h"
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include "escape.h"
#define USE_XHTML(opt) (opt->flags & HOEDOWN_HTML_USE_XHTML)
hoedown_html_tag
hoedown_html_is_tag(const uint8_t *data, size_t size, const char *tagname)
{
size_t i;
int closed = 0;
if (size < 3 || data[0] != '<')
return HOEDOWN_HTML_TAG_NONE;
i = 1;
if (data[i] == '/') {
closed = 1;
i++;
}
for (; i < size; ++i, ++tagname) {
if (*tagname == 0)
break;
if (data[i] != *tagname)
return HOEDOWN_HTML_TAG_NONE;
}
if (i == size)
return HOEDOWN_HTML_TAG_NONE;
if (isspace(data[i]) || data[i] == '>')
return closed ? HOEDOWN_HTML_TAG_CLOSE : HOEDOWN_HTML_TAG_OPEN;
return HOEDOWN_HTML_TAG_NONE;
}
static void escape_html(hoedown_buffer *ob, const uint8_t *source, size_t length)
{
hoedown_escape_html(ob, source, length, 0);
}
static void escape_href(hoedown_buffer *ob, const uint8_t *source, size_t length)
{
hoedown_escape_href(ob, source, length);
}
/********************
* GENERIC RENDERER *
********************/
static int
rndr_autolink(hoedown_buffer *ob, const hoedown_buffer *link, hoedown_autolink_type type, const hoedown_renderer_data *data)
{
hoedown_html_renderer_state *state = data->opaque;
if (!link || !link->size)
return 0;
HOEDOWN_BUFPUTSL(ob, "<a href=\"");
if (type == HOEDOWN_AUTOLINK_EMAIL)
HOEDOWN_BUFPUTSL(ob, "mailto:");
escape_href(ob, link->data, link->size);
if (state->link_attributes) {
hoedown_buffer_putc(ob, '\"');
state->link_attributes(ob, link, data);
hoedown_buffer_putc(ob, '>');
} else {
HOEDOWN_BUFPUTSL(ob, "\">");
}
/*
* Pretty printing: if we get an email address as
* an actual URI, e.g. `mailto:foo@bar.com`, we don't
* want to print the `mailto:` prefix
*/
if (hoedown_buffer_prefix(link, "mailto:") == 0) {
escape_html(ob, link->data + 7, link->size - 7);
} else {
escape_html(ob, link->data, link->size);
}
HOEDOWN_BUFPUTSL(ob, "</a>");
return 1;
}
static void
rndr_blockcode(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_buffer *lang, const hoedown_renderer_data *data)
{
if (ob->size) hoedown_buffer_putc(ob, '\n');
if (lang) {
HOEDOWN_BUFPUTSL(ob, "<pre><code class=\"language-");
escape_html(ob, lang->data, lang->size);
HOEDOWN_BUFPUTSL(ob, "\">");
} else {
HOEDOWN_BUFPUTSL(ob, "<pre><code>");
}
if (text)
escape_html(ob, text->data, text->size);
HOEDOWN_BUFPUTSL(ob, "</code></pre>\n");
}
static void
rndr_blockquote(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (ob->size) hoedown_buffer_putc(ob, '\n');
HOEDOWN_BUFPUTSL(ob, "<blockquote>\n");
if (content) hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</blockquote>\n");
}
static int
rndr_codespan(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data)
{
HOEDOWN_BUFPUTSL(ob, "<code>");
if (text) escape_html(ob, text->data, text->size);
HOEDOWN_BUFPUTSL(ob, "</code>");
return 1;
}
static int
rndr_strikethrough(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (!content || !content->size)
return 0;
HOEDOWN_BUFPUTSL(ob, "<del>");
hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</del>");
return 1;
}
static int
rndr_double_emphasis(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (!content || !content->size)
return 0;
HOEDOWN_BUFPUTSL(ob, "<strong>");
hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</strong>");
return 1;
}
static int
rndr_emphasis(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (!content || !content->size) return 0;
HOEDOWN_BUFPUTSL(ob, "<em>");
if (content) hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</em>");
return 1;
}
static int
rndr_underline(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (!content || !content->size)
return 0;
HOEDOWN_BUFPUTSL(ob, "<u>");
hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</u>");
return 1;
}
static int
rndr_highlight(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (!content || !content->size)
return 0;
HOEDOWN_BUFPUTSL(ob, "<mark>");
hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</mark>");
return 1;
}
static int
rndr_quote(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (!content || !content->size)
return 0;
HOEDOWN_BUFPUTSL(ob, "<q>");
hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</q>");
return 1;
}
static int
rndr_linebreak(hoedown_buffer *ob, const hoedown_renderer_data *data)
{
hoedown_html_renderer_state *state = data->opaque;
hoedown_buffer_puts(ob, USE_XHTML(state) ? "<br/>\n" : "<br>\n");
return 1;
}
static void
rndr_header(hoedown_buffer *ob, const hoedown_buffer *content, int level, const hoedown_renderer_data *data)
{
hoedown_html_renderer_state *state = data->opaque;
if (ob->size)
hoedown_buffer_putc(ob, '\n');
if (level <= state->toc_data.nesting_level)
hoedown_buffer_printf(ob, "<h%d id=\"toc_%d\">", level, state->toc_data.header_count++);
else
hoedown_buffer_printf(ob, "<h%d>", level);
if (content) hoedown_buffer_put(ob, content->data, content->size);
hoedown_buffer_printf(ob, "</h%d>\n", level);
}
static int
rndr_link(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_buffer *link, const hoedown_buffer *title, const hoedown_renderer_data *data)
{
hoedown_html_renderer_state *state = data->opaque;
HOEDOWN_BUFPUTSL(ob, "<a href=\"");
if (link && link->size)
escape_href(ob, link->data, link->size);
if (title && title->size) {
HOEDOWN_BUFPUTSL(ob, "\" title=\"");
escape_html(ob, title->data, title->size);
}
if (state->link_attributes) {
hoedown_buffer_putc(ob, '\"');
state->link_attributes(ob, link, data);
hoedown_buffer_putc(ob, '>');
} else {
HOEDOWN_BUFPUTSL(ob, "\">");
}
if (content && content->size) hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</a>");
return 1;
}
static void
rndr_list(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_list_flags flags, const hoedown_renderer_data *data)
{
if (ob->size) hoedown_buffer_putc(ob, '\n');
hoedown_buffer_put(ob, (const uint8_t *)(flags & HOEDOWN_LIST_ORDERED ? "<ol>\n" : "<ul>\n"), 5);
if (content) hoedown_buffer_put(ob, content->data, content->size);
hoedown_buffer_put(ob, (const uint8_t *)(flags & HOEDOWN_LIST_ORDERED ? "</ol>\n" : "</ul>\n"), 6);
}
static void
rndr_listitem(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_list_flags flags, const hoedown_renderer_data *data)
{
HOEDOWN_BUFPUTSL(ob, "<li>");
if (content) {
size_t size = content->size;
while (size && content->data[size - 1] == '\n')
size--;
hoedown_buffer_put(ob, content->data, size);
}
HOEDOWN_BUFPUTSL(ob, "</li>\n");
}
static void
rndr_paragraph(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
hoedown_html_renderer_state *state = data->opaque;
size_t i = 0;
if (ob->size) hoedown_buffer_putc(ob, '\n');
if (!content || !content->size)
return;
while (i < content->size && isspace(content->data[i])) i++;
if (i == content->size)
return;
HOEDOWN_BUFPUTSL(ob, "<p>");
if (state->flags & HOEDOWN_HTML_HARD_WRAP) {
size_t org;
while (i < content->size) {
org = i;
while (i < content->size && content->data[i] != '\n')
i++;
if (i > org)
hoedown_buffer_put(ob, content->data + org, i - org);
/*
* do not insert a line break if this newline
* is the last character on the paragraph
*/
if (i >= content->size - 1)
break;
rndr_linebreak(ob, data);
i++;
}
} else {
hoedown_buffer_put(ob, content->data + i, content->size - i);
}
HOEDOWN_BUFPUTSL(ob, "</p>\n");
}
static void
rndr_raw_block(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data)
{
size_t org, sz;
if (!text)
return;
/* FIXME: Do we *really* need to trim the HTML? How does that make a difference? */
sz = text->size;
while (sz > 0 && text->data[sz - 1] == '\n')
sz--;
org = 0;
while (org < sz && text->data[org] == '\n')
org++;
if (org >= sz)
return;
if (ob->size)
hoedown_buffer_putc(ob, '\n');
hoedown_buffer_put(ob, text->data + org, sz - org);
hoedown_buffer_putc(ob, '\n');
}
static int
rndr_triple_emphasis(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (!content || !content->size) return 0;
HOEDOWN_BUFPUTSL(ob, "<strong><em>");
hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</em></strong>");
return 1;
}
static void
rndr_hrule(hoedown_buffer *ob, const hoedown_renderer_data *data)
{
hoedown_html_renderer_state *state = data->opaque;
if (ob->size) hoedown_buffer_putc(ob, '\n');
hoedown_buffer_puts(ob, USE_XHTML(state) ? "<hr/>\n" : "<hr>\n");
}
static int
rndr_image(hoedown_buffer *ob, const hoedown_buffer *link, const hoedown_buffer *title, const hoedown_buffer *alt, const hoedown_renderer_data *data)
{
hoedown_html_renderer_state *state = data->opaque;
if (!link || !link->size) return 0;
HOEDOWN_BUFPUTSL(ob, "<img src=\"");
escape_href(ob, link->data, link->size);
HOEDOWN_BUFPUTSL(ob, "\" alt=\"");
if (alt && alt->size)
escape_html(ob, alt->data, alt->size);
if (title && title->size) {
HOEDOWN_BUFPUTSL(ob, "\" title=\"");
escape_html(ob, title->data, title->size); }
hoedown_buffer_puts(ob, USE_XHTML(state) ? "\"/>" : "\">");
return 1;
}
static int
rndr_raw_html(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data)
{
hoedown_html_renderer_state *state = data->opaque;
/* ESCAPE overrides SKIP_HTML. It doesn't look to see if
* there are any valid tags, just escapes all of them. */
if((state->flags & HOEDOWN_HTML_ESCAPE) != 0) {
escape_html(ob, text->data, text->size);
return 1;
}
if ((state->flags & HOEDOWN_HTML_SKIP_HTML) != 0)
return 1;
hoedown_buffer_put(ob, text->data, text->size);
return 1;
}
static void
rndr_table(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (ob->size) hoedown_buffer_putc(ob, '\n');
HOEDOWN_BUFPUTSL(ob, "<table>\n");
hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</table>\n");
}
static void
rndr_table_header(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (ob->size) hoedown_buffer_putc(ob, '\n');
HOEDOWN_BUFPUTSL(ob, "<thead>\n");
hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</thead>\n");
}
static void
rndr_table_body(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (ob->size) hoedown_buffer_putc(ob, '\n');
HOEDOWN_BUFPUTSL(ob, "<tbody>\n");
hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</tbody>\n");
}
static void
rndr_tablerow(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
HOEDOWN_BUFPUTSL(ob, "<tr>\n");
if (content) hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</tr>\n");
}
static void
rndr_tablecell(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_table_flags flags, const hoedown_renderer_data *data)
{
if (flags & HOEDOWN_TABLE_HEADER) {
HOEDOWN_BUFPUTSL(ob, "<th");
} else {
HOEDOWN_BUFPUTSL(ob, "<td");
}
switch (flags & HOEDOWN_TABLE_ALIGNMASK) {
case HOEDOWN_TABLE_ALIGN_CENTER:
HOEDOWN_BUFPUTSL(ob, " style=\"text-align: center\">");
break;
case HOEDOWN_TABLE_ALIGN_LEFT:
HOEDOWN_BUFPUTSL(ob, " style=\"text-align: left\">");
break;
case HOEDOWN_TABLE_ALIGN_RIGHT:
HOEDOWN_BUFPUTSL(ob, " style=\"text-align: right\">");
break;
default:
HOEDOWN_BUFPUTSL(ob, ">");
}
if (content)
hoedown_buffer_put(ob, content->data, content->size);
if (flags & HOEDOWN_TABLE_HEADER) {
HOEDOWN_BUFPUTSL(ob, "</th>\n");
} else {
HOEDOWN_BUFPUTSL(ob, "</td>\n");
}
}
static int
rndr_superscript(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (!content || !content->size) return 0;
HOEDOWN_BUFPUTSL(ob, "<sup>");
hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</sup>");
return 1;
}
static void
rndr_normal_text(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (content)
escape_html(ob, content->data, content->size);
}
static void
rndr_footnotes(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
hoedown_html_renderer_state *state = data->opaque;
if (ob->size) hoedown_buffer_putc(ob, '\n');
HOEDOWN_BUFPUTSL(ob, "<div class=\"footnotes\">\n");
hoedown_buffer_puts(ob, USE_XHTML(state) ? "<hr/>\n" : "<hr>\n");
HOEDOWN_BUFPUTSL(ob, "<ol>\n");
if (content) hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "\n</ol>\n</div>\n");
}
static void
rndr_footnote_def(hoedown_buffer *ob, const hoedown_buffer *content, unsigned int num, const hoedown_renderer_data *data)
{
size_t i = 0;
int pfound = 0;
/* insert anchor at the end of first paragraph block */
if (content) {
while ((i+3) < content->size) {
if (content->data[i++] != '<') continue;
if (content->data[i++] != '/') continue;
if (content->data[i++] != 'p' && content->data[i] != 'P') continue;
if (content->data[i] != '>') continue;
i -= 3;
pfound = 1;
break;
}
}
hoedown_buffer_printf(ob, "\n<li id=\"fn%d\">\n", num);
if (pfound) {
hoedown_buffer_put(ob, content->data, i);
hoedown_buffer_printf(ob, "&nbsp;<a href=\"#fnref%d\" rev=\"footnote\">&#8617;</a>", num);
hoedown_buffer_put(ob, content->data + i, content->size - i);
} else if (content) {
hoedown_buffer_put(ob, content->data, content->size);
}
HOEDOWN_BUFPUTSL(ob, "</li>\n");
}
static int
rndr_footnote_ref(hoedown_buffer *ob, unsigned int num, const hoedown_renderer_data *data)
{
hoedown_buffer_printf(ob, "<sup id=\"fnref%d\"><a href=\"#fn%d\" rel=\"footnote\">%d</a></sup>", num, num, num);
return 1;
}
static int
rndr_math(hoedown_buffer *ob, const hoedown_buffer *text, int displaymode, const hoedown_renderer_data *data)
{
hoedown_buffer_put(ob, (const uint8_t *)(displaymode ? "\\[" : "\\("), 2);
escape_html(ob, text->data, text->size);
hoedown_buffer_put(ob, (const uint8_t *)(displaymode ? "\\]" : "\\)"), 2);
return 1;
}
static void
toc_header(hoedown_buffer *ob, const hoedown_buffer *content, int level, const hoedown_renderer_data *data)
{
hoedown_html_renderer_state *state = data->opaque;
if (level <= state->toc_data.nesting_level) {
/* set the level offset if this is the first header
* we're parsing for the document */
if (state->toc_data.current_level == 0)
state->toc_data.level_offset = level - 1;
level -= state->toc_data.level_offset;
if (level > state->toc_data.current_level) {
while (level > state->toc_data.current_level) {
HOEDOWN_BUFPUTSL(ob, "<ul>\n<li>\n");
state->toc_data.current_level++;
}
} else if (level < state->toc_data.current_level) {
HOEDOWN_BUFPUTSL(ob, "</li>\n");
while (level < state->toc_data.current_level) {
HOEDOWN_BUFPUTSL(ob, "</ul>\n</li>\n");
state->toc_data.current_level--;
}
HOEDOWN_BUFPUTSL(ob,"<li>\n");
} else {
HOEDOWN_BUFPUTSL(ob,"</li>\n<li>\n");
}
hoedown_buffer_printf(ob, "<a href=\"#toc_%d\">", state->toc_data.header_count++);
if (content) hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</a>\n");
}
}
static int
toc_link(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_buffer *link, const hoedown_buffer *title, const hoedown_renderer_data *data)
{
if (content && content->size) hoedown_buffer_put(ob, content->data, content->size);
return 1;
}
static void
toc_finalize(hoedown_buffer *ob, int inline_render, const hoedown_renderer_data *data)
{
hoedown_html_renderer_state *state;
if (inline_render)
return;
state = data->opaque;
while (state->toc_data.current_level > 0) {
HOEDOWN_BUFPUTSL(ob, "</li>\n</ul>\n");
state->toc_data.current_level--;
}
state->toc_data.header_count = 0;
}
hoedown_renderer *
hoedown_html_toc_renderer_new(int nesting_level)
{
static const hoedown_renderer cb_default = {
NULL,
NULL,
NULL,
toc_header,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
rndr_codespan,
rndr_double_emphasis,
rndr_emphasis,
rndr_underline,
rndr_highlight,
rndr_quote,
NULL,
NULL,
toc_link,
rndr_triple_emphasis,
rndr_strikethrough,
rndr_superscript,
NULL,
NULL,
NULL,
NULL,
rndr_normal_text,
NULL,
toc_finalize
};
hoedown_html_renderer_state *state;
hoedown_renderer *renderer;
/* Prepare the state pointer */
state = hoedown_malloc(sizeof(hoedown_html_renderer_state));
memset(state, 0x0, sizeof(hoedown_html_renderer_state));
state->toc_data.nesting_level = nesting_level;
/* Prepare the renderer */
renderer = hoedown_malloc(sizeof(hoedown_renderer));
memcpy(renderer, &cb_default, sizeof(hoedown_renderer));
renderer->opaque = state;
return renderer;
}
hoedown_renderer *
hoedown_html_renderer_new(hoedown_html_flags render_flags, int nesting_level)
{
static const hoedown_renderer cb_default = {
NULL,
rndr_blockcode,
rndr_blockquote,
rndr_header,
rndr_hrule,
rndr_list,
rndr_listitem,
rndr_paragraph,
rndr_table,
rndr_table_header,
rndr_table_body,
rndr_tablerow,
rndr_tablecell,
rndr_footnotes,
rndr_footnote_def,
rndr_raw_block,
rndr_autolink,
rndr_codespan,
rndr_double_emphasis,
rndr_emphasis,
rndr_underline,
rndr_highlight,
rndr_quote,
rndr_image,
rndr_linebreak,
rndr_link,
rndr_triple_emphasis,
rndr_strikethrough,
rndr_superscript,
rndr_footnote_ref,
rndr_math,
rndr_raw_html,
NULL,
rndr_normal_text,
NULL,
NULL
};
hoedown_html_renderer_state *state;
hoedown_renderer *renderer;
/* Prepare the state pointer */
state = hoedown_malloc(sizeof(hoedown_html_renderer_state));
memset(state, 0x0, sizeof(hoedown_html_renderer_state));
state->flags = render_flags;
state->toc_data.nesting_level = nesting_level;
/* Prepare the renderer */
renderer = hoedown_malloc(sizeof(hoedown_renderer));
memcpy(renderer, &cb_default, sizeof(hoedown_renderer));
if (render_flags & HOEDOWN_HTML_SKIP_HTML || render_flags & HOEDOWN_HTML_ESCAPE)
renderer->blockhtml = NULL;
renderer->opaque = state;
return renderer;
}
void
hoedown_html_renderer_free(hoedown_renderer *renderer)
{
free(renderer->opaque);
free(renderer);
}

84
src/hoedown/html.h Normal file
View File

@ -0,0 +1,84 @@
/* html.h - HTML renderer and utilities */
#ifndef HOEDOWN_HTML_H
#define HOEDOWN_HTML_H
#include "document.h"
#include "buffer.h"
#ifdef __cplusplus
extern "C" {
#endif
/*************
* CONSTANTS *
*************/
typedef enum hoedown_html_flags {
HOEDOWN_HTML_SKIP_HTML = (1 << 0),
HOEDOWN_HTML_ESCAPE = (1 << 1),
HOEDOWN_HTML_HARD_WRAP = (1 << 2),
HOEDOWN_HTML_USE_XHTML = (1 << 3)
} hoedown_html_flags;
typedef enum hoedown_html_tag {
HOEDOWN_HTML_TAG_NONE = 0,
HOEDOWN_HTML_TAG_OPEN,
HOEDOWN_HTML_TAG_CLOSE
} hoedown_html_tag;
/*********
* TYPES *
*********/
struct hoedown_html_renderer_state {
void *opaque;
struct {
int header_count;
int current_level;
int level_offset;
int nesting_level;
} toc_data;
hoedown_html_flags flags;
/* extra callbacks */
void (*link_attributes)(hoedown_buffer *ob, const hoedown_buffer *url, const hoedown_renderer_data *data);
};
typedef struct hoedown_html_renderer_state hoedown_html_renderer_state;
/*************
* FUNCTIONS *
*************/
/* hoedown_html_smartypants: process an HTML snippet using SmartyPants for smart punctuation */
void hoedown_html_smartypants(hoedown_buffer *ob, const uint8_t *data, size_t size);
/* hoedown_html_is_tag: checks if data starts with a specific tag, returns the tag type or NONE */
hoedown_html_tag hoedown_html_is_tag(const uint8_t *data, size_t size, const char *tagname);
/* hoedown_html_renderer_new: allocates a regular HTML renderer */
hoedown_renderer *hoedown_html_renderer_new(
hoedown_html_flags render_flags,
int nesting_level
) __attribute__ ((malloc));
/* hoedown_html_toc_renderer_new: like hoedown_html_renderer_new, but the returned renderer produces the Table of Contents */
hoedown_renderer *hoedown_html_toc_renderer_new(
int nesting_level
) __attribute__ ((malloc));
/* hoedown_html_renderer_free: deallocate an HTML renderer */
void hoedown_html_renderer_free(hoedown_renderer *renderer);
#ifdef __cplusplus
}
#endif
#endif /** HOEDOWN_HTML_H **/

240
src/hoedown/html_blocks.c Normal file
View File

@ -0,0 +1,240 @@
/* ANSI-C code produced by gperf version 3.0.3 */
/* Command-line: gperf -L ANSI-C -N hoedown_find_block_tag -c -C -E -S 1 --ignore-case -m100 html_block_names.gperf */
/* Computed positions: -k'1-2' */
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
&& ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
&& (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
&& ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
&& ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
&& ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
&& ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
&& ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
&& ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
&& ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
&& ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
&& ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
&& ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
&& ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
&& ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
&& ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
&& ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
&& ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
&& ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
&& ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
&& ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
&& ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
&& ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
/* The character set is not based on ISO-646. */
#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
#endif
/* maximum key range = 24, duplicates = 0 */
#ifndef GPERF_DOWNCASE
#define GPERF_DOWNCASE 1
static unsigned char gperf_downcase[256] =
{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
255
};
#endif
#ifndef GPERF_CASE_STRNCMP
#define GPERF_CASE_STRNCMP 1
static int
gperf_case_strncmp (register const char *s1, register const char *s2, register unsigned int n)
{
for (; n > 0;)
{
unsigned char c1 = gperf_downcase[(unsigned char)*s1++];
unsigned char c2 = gperf_downcase[(unsigned char)*s2++];
if (c1 != 0 && c1 == c2)
{
n--;
continue;
}
return (int)c1 - (int)c2;
}
return 0;
}
#endif
#ifdef __GNUC__
__inline
#else
#ifdef __cplusplus
inline
#endif
#endif
static unsigned int
hash (register const char *str, register unsigned int len)
{
static const unsigned char asso_values[] =
{
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
22, 21, 19, 18, 16, 0, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 1, 25, 0, 25,
1, 0, 0, 13, 0, 25, 25, 11, 2, 1,
0, 25, 25, 5, 0, 2, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 1, 25,
0, 25, 1, 0, 0, 13, 0, 25, 25, 11,
2, 1, 0, 25, 25, 5, 0, 2, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25
};
register int hval = (int)len;
switch (hval)
{
default:
hval += asso_values[(unsigned char)str[1]+1];
/*FALLTHROUGH*/
case 1:
hval += asso_values[(unsigned char)str[0]];
break;
}
return hval;
}
#ifdef __GNUC__
__inline
#ifdef __GNUC_STDC_INLINE__
__attribute__ ((__gnu_inline__))
#endif
#endif
const char *
hoedown_find_block_tag (register const char *str, register unsigned int len)
{
enum
{
TOTAL_KEYWORDS = 24,
MIN_WORD_LENGTH = 1,
MAX_WORD_LENGTH = 10,
MIN_HASH_VALUE = 1,
MAX_HASH_VALUE = 24
};
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
{
register int key = hash (str, len);
if (key <= MAX_HASH_VALUE && key >= MIN_HASH_VALUE)
{
register const char *resword;
switch (key - 1)
{
case 0:
resword = "p";
goto compare;
case 1:
resword = "h6";
goto compare;
case 2:
resword = "div";
goto compare;
case 3:
resword = "del";
goto compare;
case 4:
resword = "form";
goto compare;
case 5:
resword = "table";
goto compare;
case 6:
resword = "figure";
goto compare;
case 7:
resword = "pre";
goto compare;
case 8:
resword = "fieldset";
goto compare;
case 9:
resword = "noscript";
goto compare;
case 10:
resword = "script";
goto compare;
case 11:
resword = "style";
goto compare;
case 12:
resword = "dl";
goto compare;
case 13:
resword = "ol";
goto compare;
case 14:
resword = "ul";
goto compare;
case 15:
resword = "math";
goto compare;
case 16:
resword = "ins";
goto compare;
case 17:
resword = "h5";
goto compare;
case 18:
resword = "iframe";
goto compare;
case 19:
resword = "h4";
goto compare;
case 20:
resword = "h3";
goto compare;
case 21:
resword = "blockquote";
goto compare;
case 22:
resword = "h2";
goto compare;
case 23:
resword = "h1";
goto compare;
}
return 0;
compare:
if ((((unsigned char)*str ^ (unsigned char)*resword) & ~32) == 0 && !gperf_case_strncmp (str, resword, len) && resword[len] == '\0')
return resword;
}
}
return 0;
}

View File

@ -0,0 +1,435 @@
#include "html.h"
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#ifdef _MSC_VER
#define snprintf _snprintf
#endif
struct smartypants_data {
int in_squote;
int in_dquote;
};
static size_t smartypants_cb__ltag(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__dquote(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__amp(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__period(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__number(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__dash(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__parens(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__squote(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__backtick(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__escape(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t (*smartypants_cb_ptrs[])
(hoedown_buffer *, struct smartypants_data *, uint8_t, const uint8_t *, size_t) =
{
NULL, /* 0 */
smartypants_cb__dash, /* 1 */
smartypants_cb__parens, /* 2 */
smartypants_cb__squote, /* 3 */
smartypants_cb__dquote, /* 4 */
smartypants_cb__amp, /* 5 */
smartypants_cb__period, /* 6 */
smartypants_cb__number, /* 7 */
smartypants_cb__ltag, /* 8 */
smartypants_cb__backtick, /* 9 */
smartypants_cb__escape, /* 10 */
};
static const uint8_t smartypants_cb_chars[UINT8_MAX+1] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0,
0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static int
word_boundary(uint8_t c)
{
return c == 0 || isspace(c) || ispunct(c);
}
/*
If 'text' begins with any kind of single quote (e.g. "'" or "&apos;" etc.),
returns the length of the sequence of characters that makes up the single-
quote. Otherwise, returns zero.
*/
static size_t
squote_len(const uint8_t *text, size_t size)
{
static char* single_quote_list[] = { "'", "&#39;", "&#x27;", "&apos;", NULL };
char** p;
for (p = single_quote_list; *p; ++p) {
size_t len = strlen(*p);
if (size >= len && memcmp(text, *p, len) == 0) {
return len;
}
}
return 0;
}
/* Converts " or ' at very beginning or end of a word to left or right quote */
static int
smartypants_quotes(hoedown_buffer *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open)
{
char ent[8];
if (*is_open && !word_boundary(next_char))
return 0;
if (!(*is_open) && !word_boundary(previous_char))
return 0;
snprintf(ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote);
*is_open = !(*is_open);
hoedown_buffer_puts(ob, ent);
return 1;
}
/*
Converts ' to left or right single quote; but the initial ' might be in
different forms, e.g. &apos; or &#39; or &#x27;.
'squote_text' points to the original single quote, and 'squote_size' is its length.
'text' points at the last character of the single-quote, e.g. ' or ;
*/
static size_t
smartypants_squote(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size,
const uint8_t *squote_text, size_t squote_size)
{
if (size >= 2) {
uint8_t t1 = tolower(text[1]);
size_t next_squote_len = squote_len(text+1, size-1);
/* convert '' to &ldquo; or &rdquo; */
if (next_squote_len > 0) {
uint8_t next_char = (size > 1+next_squote_len) ? text[1+next_squote_len] : 0;
if (smartypants_quotes(ob, previous_char, next_char, 'd', &smrt->in_dquote))
return next_squote_len;
}
/* Tom's, isn't, I'm, I'd */
if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') &&
(size == 3 || word_boundary(text[2]))) {
HOEDOWN_BUFPUTSL(ob, "&rsquo;");
return 0;
}
/* you're, you'll, you've */
if (size >= 3) {
uint8_t t2 = tolower(text[2]);
if (((t1 == 'r' && t2 == 'e') ||
(t1 == 'l' && t2 == 'l') ||
(t1 == 'v' && t2 == 'e')) &&
(size == 4 || word_boundary(text[3]))) {
HOEDOWN_BUFPUTSL(ob, "&rsquo;");
return 0;
}
}
}
if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote))
return 0;
hoedown_buffer_put(ob, squote_text, squote_size);
return 0;
}
/* Converts ' to left or right single quote. */
static size_t
smartypants_cb__squote(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
return smartypants_squote(ob, smrt, previous_char, text, size, text, 1);
}
/* Converts (c), (r), (tm) */
static size_t
smartypants_cb__parens(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (size >= 3) {
uint8_t t1 = tolower(text[1]);
uint8_t t2 = tolower(text[2]);
if (t1 == 'c' && t2 == ')') {
HOEDOWN_BUFPUTSL(ob, "&copy;");
return 2;
}
if (t1 == 'r' && t2 == ')') {
HOEDOWN_BUFPUTSL(ob, "&reg;");
return 2;
}
if (size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')') {
HOEDOWN_BUFPUTSL(ob, "&trade;");
return 3;
}
}
hoedown_buffer_putc(ob, text[0]);
return 0;
}
/* Converts "--" to em-dash, etc. */
static size_t
smartypants_cb__dash(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (size >= 3 && text[1] == '-' && text[2] == '-') {
HOEDOWN_BUFPUTSL(ob, "&mdash;");
return 2;
}
if (size >= 2 && text[1] == '-') {
HOEDOWN_BUFPUTSL(ob, "&ndash;");
return 1;
}
hoedown_buffer_putc(ob, text[0]);
return 0;
}
/* Converts &quot; etc. */
static size_t
smartypants_cb__amp(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
size_t len;
if (size >= 6 && memcmp(text, "&quot;", 6) == 0) {
if (smartypants_quotes(ob, previous_char, size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote))
return 5;
}
len = squote_len(text, size);
if (len > 0) {
return (len-1) + smartypants_squote(ob, smrt, previous_char, text+(len-1), size-(len-1), text, len);
}
if (size >= 4 && memcmp(text, "&#0;", 4) == 0)
return 3;
hoedown_buffer_putc(ob, '&');
return 0;
}
/* Converts "..." to ellipsis */
static size_t
smartypants_cb__period(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (size >= 3 && text[1] == '.' && text[2] == '.') {
HOEDOWN_BUFPUTSL(ob, "&hellip;");
return 2;
}
if (size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.') {
HOEDOWN_BUFPUTSL(ob, "&hellip;");
return 4;
}
hoedown_buffer_putc(ob, text[0]);
return 0;
}
/* Converts `` to opening double quote */
static size_t
smartypants_cb__backtick(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (size >= 2 && text[1] == '`') {
if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
return 1;
}
hoedown_buffer_putc(ob, text[0]);
return 0;
}
/* Converts 1/2, 1/4, 3/4 */
static size_t
smartypants_cb__number(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (word_boundary(previous_char) && size >= 3) {
if (text[0] == '1' && text[1] == '/' && text[2] == '2') {
if (size == 3 || word_boundary(text[3])) {
HOEDOWN_BUFPUTSL(ob, "&frac12;");
return 2;
}
}
if (text[0] == '1' && text[1] == '/' && text[2] == '4') {
if (size == 3 || word_boundary(text[3]) ||
(size >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h')) {
HOEDOWN_BUFPUTSL(ob, "&frac14;");
return 2;
}
}
if (text[0] == '3' && text[1] == '/' && text[2] == '4') {
if (size == 3 || word_boundary(text[3]) ||
(size >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's')) {
HOEDOWN_BUFPUTSL(ob, "&frac34;");
return 2;
}
}
}
hoedown_buffer_putc(ob, text[0]);
return 0;
}
/* Converts " to left or right double quote */
static size_t
smartypants_cb__dquote(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (!smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote))
HOEDOWN_BUFPUTSL(ob, "&quot;");
return 0;
}
static size_t
smartypants_cb__ltag(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
static const char *skip_tags[] = {
"pre", "code", "var", "samp", "kbd", "math", "script", "style"
};
static const size_t skip_tags_count = 8;
size_t tag, i = 0;
/* This is a comment. Copy everything verbatim until --> or EOF is seen. */
if (i + 4 < size && memcmp(text, "<!--", 4) == 0) {
i += 4;
while (i + 3 < size && memcmp(text + i, "-->", 3) != 0)
i++;
i += 3;
hoedown_buffer_put(ob, text, i + 1);
return i;
}
while (i < size && text[i] != '>')
i++;
for (tag = 0; tag < skip_tags_count; ++tag) {
if (hoedown_html_is_tag(text, size, skip_tags[tag]) == HOEDOWN_HTML_TAG_OPEN)
break;
}
if (tag < skip_tags_count) {
for (;;) {
while (i < size && text[i] != '<')
i++;
if (i == size)
break;
if (hoedown_html_is_tag(text + i, size - i, skip_tags[tag]) == HOEDOWN_HTML_TAG_CLOSE)
break;
i++;
}
while (i < size && text[i] != '>')
i++;
}
hoedown_buffer_put(ob, text, i + 1);
return i;
}
static size_t
smartypants_cb__escape(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (size < 2)
return 0;
switch (text[1]) {
case '\\':
case '"':
case '\'':
case '.':
case '-':
case '`':
hoedown_buffer_putc(ob, text[1]);
return 1;
default:
hoedown_buffer_putc(ob, '\\');
return 0;
}
}
#if 0
static struct {
uint8_t c0;
const uint8_t *pattern;
const uint8_t *entity;
int skip;
} smartypants_subs[] = {
{ '\'', "'s>", "&rsquo;", 0 },
{ '\'', "'t>", "&rsquo;", 0 },
{ '\'', "'re>", "&rsquo;", 0 },
{ '\'', "'ll>", "&rsquo;", 0 },
{ '\'', "'ve>", "&rsquo;", 0 },
{ '\'', "'m>", "&rsquo;", 0 },
{ '\'', "'d>", "&rsquo;", 0 },
{ '-', "--", "&mdash;", 1 },
{ '-', "<->", "&ndash;", 0 },
{ '.', "...", "&hellip;", 2 },
{ '.', ". . .", "&hellip;", 4 },
{ '(', "(c)", "&copy;", 2 },
{ '(', "(r)", "&reg;", 2 },
{ '(', "(tm)", "&trade;", 3 },
{ '3', "<3/4>", "&frac34;", 2 },
{ '3', "<3/4ths>", "&frac34;", 2 },
{ '1', "<1/2>", "&frac12;", 2 },
{ '1', "<1/4>", "&frac14;", 2 },
{ '1', "<1/4th>", "&frac14;", 2 },
{ '&', "&#0;", 0, 3 },
};
#endif
void
hoedown_html_smartypants(hoedown_buffer *ob, const uint8_t *text, size_t size)
{
size_t i;
struct smartypants_data smrt = {0, 0};
if (!text)
return;
hoedown_buffer_grow(ob, size);
for (i = 0; i < size; ++i) {
size_t org;
uint8_t action = 0;
org = i;
while (i < size && (action = smartypants_cb_chars[text[i]]) == 0)
i++;
if (i > org)
hoedown_buffer_put(ob, text + org, i - org);
if (i < size) {
i += smartypants_cb_ptrs[(int)action]
(ob, &smrt, i ? text[i - 1] : 0, text + i, size - i);
}
}
}

79
src/hoedown/stack.c Normal file
View File

@ -0,0 +1,79 @@
#include "stack.h"
#include "buffer.h"
#include <stdlib.h>
#include <string.h>
#include <assert.h>
void
hoedown_stack_init(hoedown_stack *st, size_t initial_size)
{
assert(st);
st->item = NULL;
st->size = st->asize = 0;
if (!initial_size)
initial_size = 8;
hoedown_stack_grow(st, initial_size);
}
void
hoedown_stack_uninit(hoedown_stack *st)
{
assert(st);
free(st->item);
}
void
hoedown_stack_grow(hoedown_stack *st, size_t neosz)
{
assert(st);
if (st->asize >= neosz)
return;
st->item = hoedown_realloc(st->item, neosz * sizeof(void *));
memset(st->item + st->asize, 0x0, (neosz - st->asize) * sizeof(void *));
st->asize = neosz;
if (st->size > neosz)
st->size = neosz;
}
void
hoedown_stack_push(hoedown_stack *st, void *item)
{
assert(st);
if (st->size >= st->asize)
hoedown_stack_grow(st, st->size * 2);
st->item[st->size++] = item;
}
void *
hoedown_stack_pop(hoedown_stack *st)
{
assert(st);
if (!st->size)
return NULL;
return st->item[--st->size];
}
void *
hoedown_stack_top(const hoedown_stack *st)
{
assert(st);
if (!st->size)
return NULL;
return st->item[st->size - 1];
}

52
src/hoedown/stack.h Normal file
View File

@ -0,0 +1,52 @@
/* stack.h - simple stacking */
#ifndef HOEDOWN_STACK_H
#define HOEDOWN_STACK_H
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
/*********
* TYPES *
*********/
struct hoedown_stack {
void **item;
size_t size;
size_t asize;
};
typedef struct hoedown_stack hoedown_stack;
/*************
* FUNCTIONS *
*************/
/* hoedown_stack_init: initialize a stack */
void hoedown_stack_init(hoedown_stack *st, size_t initial_size);
/* hoedown_stack_uninit: free internal data of the stack */
void hoedown_stack_uninit(hoedown_stack *st);
/* hoedown_stack_grow: increase the allocated size to the given value */
void hoedown_stack_grow(hoedown_stack *st, size_t neosz);
/* hoedown_stack_push: push an item to the top of the stack */
void hoedown_stack_push(hoedown_stack *st, void *item);
/* hoedown_stack_pop: retrieve and remove the item at the top of the stack */
void *hoedown_stack_pop(hoedown_stack *st);
/* hoedown_stack_top: retrieve the item at the top of the stack */
void *hoedown_stack_top(const hoedown_stack *st);
#ifdef __cplusplus
}
#endif
#endif /** HOEDOWN_STACK_H **/

9
src/hoedown/version.c Normal file
View File

@ -0,0 +1,9 @@
#include "version.h"
void
hoedown_version(int *major, int *minor, int *revision)
{
*major = HOEDOWN_VERSION_MAJOR;
*minor = HOEDOWN_VERSION_MINOR;
*revision = HOEDOWN_VERSION_REVISION;
}

33
src/hoedown/version.h Normal file
View File

@ -0,0 +1,33 @@
/* version.h - holds Hoedown's version */
#ifndef HOEDOWN_VERSION_H
#define HOEDOWN_VERSION_H
#ifdef __cplusplus
extern "C" {
#endif
/*************
* CONSTANTS *
*************/
#define HOEDOWN_VERSION "3.0.3"
#define HOEDOWN_VERSION_MAJOR 3
#define HOEDOWN_VERSION_MINOR 0
#define HOEDOWN_VERSION_REVISION 3
/*************
* FUNCTIONS *
*************/
/* hoedown_version: retrieve Hoedown's version numbers */
void hoedown_version(int *major, int *minor, int *revision);
#ifdef __cplusplus
}
#endif
#endif /** HOEDOWN_VERSION_H **/

File diff suppressed because it is too large Load Diff

View File

@ -1,247 +0,0 @@
cimport sundown
cimport wrapper
from libc.stdint cimport uint8_t
__version__ = '1.0.3'
# Markdown extensions
EXT_NO_INTRA_EMPHASIS = (1 << 0)
EXT_TABLES = (1 << 1)
EXT_FENCED_CODE = (1 << 2)
EXT_AUTOLINK = (1 << 3)
EXT_STRIKETHROUGH = (1 << 4)
EXT_SPACE_HEADERS = (1 << 6)
EXT_SUPERSCRIPT = (1 << 7)
EXT_LAX_SPACING = (1 << 8)
# HTML Render flags
HTML_SKIP_HTML = (1 << 0)
HTML_SKIP_STYLE = (1 << 1)
HTML_SKIP_IMAGES = (1 << 2)
HTML_SKIP_LINKS = (1 << 3)
HTML_EXPAND_TABS = (1 << 4)
HTML_SAFELINK = (1 << 5)
HTML_TOC = (1 << 6)
HTML_HARD_WRAP = (1 << 7)
HTML_USE_XHTML = (1 << 8)
HTML_ESCAPE = (1 << 9)
# Extra HTML render flags - these are not from Sundown
HTML_SMARTYPANTS = (1 << 10) # An extra flag to enable Smartypants
HTML_TOC_TREE = (1 << 11) # Only render a table of contents tree
# Other flags
TABLE_ALIGN_L = 1 # MKD_TABLE_ALIGN_L
TABLE_ALIGN_R = 2 # MKD_TABLE_ALIGN_R
TABLE_ALIGN_C = 3 # MKD_TABLE_ALIGN_CENTER
TABLE_ALIGNMASK = 3 # MKD_TABLE_ALIGNMASK
TABLE_HEADER = 4 # MKD_TABLE_HEADER
def html(object text, unsigned int extensions=0, unsigned int render_flags=0):
"""Convert markdown text to (X)HTML.
Returns a unicode string.
:param text: A byte or unicode string.
:param extensions: Enable additional Markdown extensions with the ``EXT_*`` constants.
:param render_flags: Adjust HTML rendering behaviour with the ``HTML_*`` constants.
"""
if render_flags & HTML_TOC_TREE:
renderer = HtmlTocRenderer(render_flags)
else:
renderer = HtmlRenderer(render_flags)
markdown = Markdown(renderer, extensions)
result = markdown.render(text)
if render_flags & HTML_SMARTYPANTS:
result = SmartyPants().postprocess(result)
return result
cdef class SmartyPants:
"""Smartypants is a post-processor for (X)HTML renderers and can be used
standalone or as a mixin. It adds a methode named ``postprocess`` to the
renderer.
================================== ========
Source Result
================================== ========
`'s` (s, t, m, d, re, ll, ve) [1]_ &rsquo;s
`--` &mdash;
`-` &ndash;
`...` &hellip;
`. . .` &hellip;
`(c)` &copy;
`(r)` &reg;
`(tm)` &trade;
`3/4` &frac34;
`1/2` &frac12;
`1/4` &frac14;
================================== ========
.. [1] A ``'`` followed by a ``s``, ``t``, ``m``, ``d``, ``re``, ``ll`` or
``ve`` will be turned into ``&rsquo;s``, ``&rsquo;t``, and so on.
"""
def postprocess(self, object text):
"""Process the input text.
Returns a unicode string.
:param text: A byte or unicode string.
"""
# Convert string
cdef bytes py_string
if hasattr(text, 'encode'):
py_string = text.encode('UTF-8', 'strict')
else:
py_string = text
cdef char *c_string = py_string
cdef sundown.buf *ob = sundown.bufnew(128)
sundown.sdhtml_smartypants(ob,
<uint8_t *> c_string, len(c_string))
try:
return (<char *> ob.data)[:ob.size].decode('UTF-8', 'strict')
finally:
sundown.bufrelease(ob)
cdef class BaseRenderer:
"""The ``BaseRenderer`` is boilerplate code for creating your own renderers by
sublassing `BaseRenderer`. It takes care of setting the callbacks and flags.
:param flags: Available as a read-only, integer type attribute named ``self.flags``.
"""
cdef sundown.sd_callbacks callbacks
cdef wrapper.renderopt options
#: Read-only render flags
cdef readonly int flags
def __init__(self, int flags=0):
self.options.self = <void *> self
self.flags = flags
self.setup()
# Set callbacks
cdef void **source = <void **> &wrapper.callback_funcs
cdef void **dest = <void **> &self.callbacks
cdef unicode method_name
for i from 0 <= i < <int> wrapper.method_count by 1:
# In Python 3 ``wrapper.method_names[i]`` is a byte string.
# This means hasattr can't find any method in the renderer, so
# ``wrapper.method_names[i]`` is converted to a normal string first.
method_name = wrapper.method_names[i].decode('utf-8')
if hasattr(self, method_name):
dest[i] = source[i]
def setup(self):
"""A method that can be overridden by the renderer that sublasses ``BaseRenderer``.
It's called everytime an instance of a renderer is created.
"""
pass
cdef class HtmlRenderer(BaseRenderer):
"""The HTML renderer that's included in Sundown.
Do you override the ``setup`` method when subclassing ``HtmlRenderer``. If
you do make sure to call parent class' ``setup`` method first.
:param flags: Adjust HTML rendering behaviour with the ``HTML_*`` constants.
"""
def setup(self):
self.options.html.flags = self.flags
sundown.sdhtml_renderer(
&self.callbacks,
&self.options.html,
self.options.html.flags)
cdef class HtmlTocRenderer(BaseRenderer):
"""The HTML table of contents renderer that's included in Sundown.
Do you override the ``setup`` method when subclassing ``HtmlTocRenderer``.
If you do make sure to call parent class' ``setup`` method first.
:param flags: Adjust HTML rendering behaviour with the ``HTML_*`` constants.
"""
def setup(self, int flags=0):
sundown.sdhtml_toc_renderer(
&self.callbacks,
&self.options.html)
cdef class Markdown:
"""The Markdown parser.
:param renderer: An instance of ``BaseRenderer``.
:param extensions: Enable additional Markdown extensions with the ``EXT_*`` constants.
"""
cdef sundown.sd_markdown *markdown
cdef BaseRenderer renderer
def __cinit__(self, object renderer, int extensions=0):
if not isinstance(renderer, BaseRenderer):
raise ValueError('expected instance of BaseRenderer, %s found' % \
renderer.__class__.__name__)
self.renderer = <BaseRenderer> renderer
self.markdown = sundown.sd_markdown_new(
extensions, 16,
&self.renderer.callbacks,
<sundown.html_renderopt *> &self.renderer.options)
def render(self, object text):
"""Render the Markdon text.
Returns a unicode string.
:param text: A byte or unicode string.
"""
if hasattr(self.renderer, 'preprocess'):
text = self.renderer.preprocess(text)
# Convert string
cdef bytes py_string
if hasattr(text, 'encode'):
py_string = text.encode('UTF-8', 'strict')
else:
py_string = text # If it's a byte string it's assumed it's UTF-8
cdef char *c_string = py_string
# Buffers
cdef sundown.buf *ib = sundown.bufnew(128)
sundown.bufputs(ib, c_string)
cdef sundown.buf *ob = sundown.bufnew(128)
sundown.bufgrow(ob, <size_t> (ib.size * 1.4))
# Parse! And make a unicode string
sundown.sd_markdown_render(ob, ib.data, ib.size, self.markdown)
text = (<char *> ob.data)[:ob.size].decode('UTF-8', 'strict')
if hasattr(self.renderer, 'postprocess'):
text = self.renderer.postprocess(text)
# Return a string and release buffers
try:
return text
finally:
sundown.bufrelease(ob)
sundown.bufrelease(ib)
def __dealloc__(self):
if self.markdown is not NULL:
sundown.sd_markdown_free(self.markdown)

View File

@ -1,97 +0,0 @@
from libc.stdint cimport uint8_t
cdef extern from 'sundown/buffer.h':
struct buf:
uint8_t *data
size_t size
size_t asize
size_t unit
buf* bufnew(size_t)
int bufgrow(buf *, size_t)
void bufcstr(buf *)
void bufrelease(buf *)
void bufputs(buf *, char *)
cdef extern from 'sundown/html.h':
struct _toc_data_st:
int header_count
int current_level
struct html_renderopt:
_toc_data_st toc_data
unsigned int flags
void (*link_attributes)(buf *ob, buf *url, void *self)
void sdhtml_renderer(
sd_callbacks *callbacks,
html_renderopt *options_ptr,
unsigned int render_flags)
void sdhtml_toc_renderer(
sd_callbacks *callbacks,
html_renderopt *options_ptr)
void sdhtml_smartypants(
buf *ob,
uint8_t *text,
size_t size)
cdef extern from 'sundown/markdown.h':
enum mkd_autolink:
pass
struct sd_callbacks:
# Block level callbacks - NULL skips the block
void (*blockcode)(buf *ob, buf *text, buf *lang, void *opaque)
void (*blockquote)(buf *ob, buf *text, void *opaque)
void (*blockhtml)(buf *ob, buf *text, void *opaque)
void (*header)(buf *ob, buf *text, int level, void *opaque)
void (*hrule)(buf *ob, void *opaque)
void (*list)(buf *ob, buf *text, int flags, void *opaque)
void (*listitem)(buf *ob, buf *text, int flags, void *opaque)
void (*paragraph)(buf *ob, buf *text, void *opaque)
void (*table)(buf *ob, buf *header, buf *body, void *opaque)
void (*table_row)(buf *ob, buf *text, void *opaque)
void (*table_cell)(buf *ob, buf *text, int flags, void *opaque)
# Span level callbacks - NULL or return 0 prints the span verbatim
int (*autolink)(buf *ob, buf *link, mkd_autolink type, void *opaque)
int (*codespan)(buf *ob, buf *text, void *opaque)
int (*double_emphasis)(buf *ob, buf *text, void *opaque)
int (*emphasis)(buf *ob, buf *text, void *opaque)
int (*image)(buf *ob, buf *link, buf *title, buf *alt, void *opaque)
int (*linebreak)(buf *ob, void *opaque)
int (*link)(buf *ob, buf *link, buf *title, buf *content, void *opaque)
int (*raw_html_tag)(buf *ob, buf *tag, void *opaque)
int (*triple_emphasis)(buf *ob, buf *text, void *opaque)
int (*strikethrough)(buf *ob, buf *text, void *opaque)
int (*superscript)(buf *ob, buf *text, void *opaque)
# Low level callbacks - NULL copies input directly into the output
void (*entity)(buf *ob, buf *entity, void *opaque)
void (*normal_text)(buf *ob, buf *text, void *opaque)
# Header and footer
void (*doc_header)(buf *ob, void *opaque)
void (*doc_footer)(buf *ob, void *opaque)
enum mkd_autolink:
pass
struct sd_markdown:
pass
sd_markdown *sd_markdown_new(
unsigned int extensions,
size_t max_nesting,
sd_callbacks *callbacks,
html_renderopt *opaque)
void sd_markdown_render(
buf *ob,
uint8_t *document,
size_t doc_size,
sd_markdown *md)
void sd_markdown_free(sd_markdown *md)
void sd_version(int *major, int *minor, int *revision)

View File

@ -1,51 +0,0 @@
/*
* Copyright (c) 2011, Vicent Marti
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef UPSKIRT_AUTOLINK_H
#define UPSKIRT_AUTOLINK_H
#include "buffer.h"
#ifdef __cplusplus
extern "C" {
#endif
enum {
SD_AUTOLINK_SHORT_DOMAINS = (1 << 0),
};
int
sd_autolink_issafe(const uint8_t *link, size_t link_len);
size_t
sd_autolink__www(size_t *rewind_p, struct buf *link,
uint8_t *data, size_t offset, size_t size, unsigned int flags);
size_t
sd_autolink__email(size_t *rewind_p, struct buf *link,
uint8_t *data, size_t offset, size_t size, unsigned int flags);
size_t
sd_autolink__url(size_t *rewind_p, struct buf *link,
uint8_t *data, size_t offset, size_t size, unsigned int flags);
#ifdef __cplusplus
}
#endif
#endif
/* vim: set filetype=c: */

View File

@ -1,225 +0,0 @@
/*
* Copyright (c) 2008, Natacha Porté
* Copyright (c) 2011, Vicent Martí
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#define BUFFER_MAX_ALLOC_SIZE (1024 * 1024 * 16) //16mb
#include "buffer.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
/* MSVC compat */
#if defined(_MSC_VER)
# define _buf_vsnprintf _vsnprintf
#else
# define _buf_vsnprintf vsnprintf
#endif
int
bufprefix(const struct buf *buf, const char *prefix)
{
size_t i;
assert(buf && buf->unit);
for (i = 0; i < buf->size; ++i) {
if (prefix[i] == 0)
return 0;
if (buf->data[i] != prefix[i])
return buf->data[i] - prefix[i];
}
return 0;
}
/* bufgrow: increasing the allocated size to the given value */
int
bufgrow(struct buf *buf, size_t neosz)
{
size_t neoasz;
void *neodata;
assert(buf && buf->unit);
if (neosz > BUFFER_MAX_ALLOC_SIZE)
return BUF_ENOMEM;
if (buf->asize >= neosz)
return BUF_OK;
neoasz = buf->asize + buf->unit;
while (neoasz < neosz)
neoasz += buf->unit;
neodata = realloc(buf->data, neoasz);
if (!neodata)
return BUF_ENOMEM;
buf->data = neodata;
buf->asize = neoasz;
return BUF_OK;
}
/* bufnew: allocation of a new buffer */
struct buf *
bufnew(size_t unit)
{
struct buf *ret;
ret = malloc(sizeof (struct buf));
if (ret) {
ret->data = 0;
ret->size = ret->asize = 0;
ret->unit = unit;
}
return ret;
}
/* bufnullterm: NULL-termination of the string array */
const char *
bufcstr(struct buf *buf)
{
assert(buf && buf->unit);
if (buf->size < buf->asize && buf->data[buf->size] == 0)
return (char *)buf->data;
if (buf->size + 1 <= buf->asize || bufgrow(buf, buf->size + 1) == 0) {
buf->data[buf->size] = 0;
return (char *)buf->data;
}
return NULL;
}
/* bufprintf: formatted printing to a buffer */
void
bufprintf(struct buf *buf, const char *fmt, ...)
{
va_list ap;
int n;
assert(buf && buf->unit);
if (buf->size >= buf->asize && bufgrow(buf, buf->size + 1) < 0)
return;
va_start(ap, fmt);
n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
va_end(ap);
if (n < 0) {
#ifdef _MSC_VER
va_start(ap, fmt);
n = _vscprintf(fmt, ap);
va_end(ap);
#else
return;
#endif
}
if ((size_t)n >= buf->asize - buf->size) {
if (bufgrow(buf, buf->size + n + 1) < 0)
return;
va_start(ap, fmt);
n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
va_end(ap);
}
if (n < 0)
return;
buf->size += n;
}
/* bufput: appends raw data to a buffer */
void
bufput(struct buf *buf, const void *data, size_t len)
{
assert(buf && buf->unit);
if (buf->size + len > buf->asize && bufgrow(buf, buf->size + len) < 0)
return;
memcpy(buf->data + buf->size, data, len);
buf->size += len;
}
/* bufputs: appends a NUL-terminated string to a buffer */
void
bufputs(struct buf *buf, const char *str)
{
bufput(buf, str, strlen(str));
}
/* bufputc: appends a single uint8_t to a buffer */
void
bufputc(struct buf *buf, int c)
{
assert(buf && buf->unit);
if (buf->size + 1 > buf->asize && bufgrow(buf, buf->size + 1) < 0)
return;
buf->data[buf->size] = c;
buf->size += 1;
}
/* bufrelease: decrease the reference count and free the buffer if needed */
void
bufrelease(struct buf *buf)
{
if (!buf)
return;
free(buf->data);
free(buf);
}
/* bufreset: frees internal data of the buffer */
void
bufreset(struct buf *buf)
{
if (!buf)
return;
free(buf->data);
buf->data = NULL;
buf->size = buf->asize = 0;
}
/* bufslurp: removes a given number of bytes from the head of the array */
void
bufslurp(struct buf *buf, size_t len)
{
assert(buf && buf->unit);
if (len >= buf->size) {
buf->size = 0;
return;
}
buf->size -= len;
memmove(buf->data, buf->data + len, buf->size);
}

View File

@ -1,96 +0,0 @@
/*
* Copyright (c) 2008, Natacha Porté
* Copyright (c) 2011, Vicent Martí
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef BUFFER_H__
#define BUFFER_H__
#include <stddef.h>
#include <stdarg.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#if defined(_MSC_VER)
#define __attribute__(x)
#define inline
#endif
typedef enum {
BUF_OK = 0,
BUF_ENOMEM = -1,
} buferror_t;
/* struct buf: character array buffer */
struct buf {
uint8_t *data; /* actual character data */
size_t size; /* size of the string */
size_t asize; /* allocated size (0 = volatile buffer) */
size_t unit; /* reallocation unit size (0 = read-only buffer) */
};
/* CONST_BUF: global buffer from a string litteral */
#define BUF_STATIC(string) \
{ (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 }
/* VOLATILE_BUF: macro for creating a volatile buffer on the stack */
#define BUF_VOLATILE(strname) \
{ (uint8_t *)strname, strlen(strname), 0, 0, 0 }
/* BUFPUTSL: optimized bufputs of a string litteral */
#define BUFPUTSL(output, literal) \
bufput(output, literal, sizeof literal - 1)
/* bufgrow: increasing the allocated size to the given value */
int bufgrow(struct buf *, size_t);
/* bufnew: allocation of a new buffer */
struct buf *bufnew(size_t) __attribute__ ((malloc));
/* bufnullterm: NUL-termination of the string array (making a C-string) */
const char *bufcstr(struct buf *);
/* bufprefix: compare the beginning of a buffer with a string */
int bufprefix(const struct buf *buf, const char *prefix);
/* bufput: appends raw data to a buffer */
void bufput(struct buf *, const void *, size_t);
/* bufputs: appends a NUL-terminated string to a buffer */
void bufputs(struct buf *, const char *);
/* bufputc: appends a single char to a buffer */
void bufputc(struct buf *, int);
/* bufrelease: decrease the reference count and free the buffer if needed */
void bufrelease(struct buf *);
/* bufreset: frees internal data of the buffer */
void bufreset(struct buf *);
/* bufslurp: removes a given number of bytes from the head of the array */
void bufslurp(struct buf *, size_t);
/* bufprintf: formatted printing to a buffer */
void bufprintf(struct buf *, const char *, ...) __attribute__ ((format (printf, 2, 3)));
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,37 +0,0 @@
#ifndef HOUDINI_H__
#define HOUDINI_H__
#include "buffer.h"
#ifdef __cplusplus
extern "C" {
#endif
#ifdef HOUDINI_USE_LOCALE
# define _isxdigit(c) isxdigit(c)
# define _isdigit(c) isdigit(c)
#else
/*
* Helper _isdigit methods -- do not trust the current locale
* */
# define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
# define _isdigit(c) ((c) >= '0' && (c) <= '9')
#endif
extern void houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size);
extern void houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure);
extern void houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size);
extern void houdini_escape_xml(struct buf *ob, const uint8_t *src, size_t size);
extern void houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size);
extern void houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size);
extern void houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size);
extern void houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size);
extern void houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size);
extern void houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size);
extern void houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,108 +0,0 @@
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include "houdini.h"
#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10)
/*
* The following characters will not be escaped:
*
* -_.+!*'(),%#@?=;:/,+&$ alphanum
*
* Note that this character set is the addition of:
*
* - The characters which are safe to be in an URL
* - The characters which are *not* safe to be in
* an URL because they are RESERVED characters.
*
* We asume (lazily) that any RESERVED char that
* appears inside an URL is actually meant to
* have its native function (i.e. as an URL
* component/separator) and hence needs no escaping.
*
* There are two exceptions: the chacters & (amp)
* and ' (single quote) do not appear in the table.
* They are meant to appear in the URL as components,
* yet they require special HTML-entity escaping
* to generate valid HTML markup.
*
* All other characters will be escaped to %XX.
*
*/
static const char HREF_SAFE[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
void
houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size)
{
static const char hex_chars[] = "0123456789ABCDEF";
size_t i = 0, org;
char hex_str[3];
bufgrow(ob, ESCAPE_GROW_FACTOR(size));
hex_str[0] = '%';
while (i < size) {
org = i;
while (i < size && HREF_SAFE[src[i]] != 0)
i++;
if (i > org)
bufput(ob, src + org, i - org);
/* escaping */
if (i >= size)
break;
switch (src[i]) {
/* amp appears all the time in URLs, but needs
* HTML-entity escaping to be inside an href */
case '&':
BUFPUTSL(ob, "&amp;");
break;
/* the single quote is a valid URL character
* according to the standard; it needs HTML
* entity escaping too */
case '\'':
BUFPUTSL(ob, "&#x27;");
break;
/* the space can be escaped to %20 or a plus
* sign. we're going with the generic escape
* for now. the plus thing is more commonly seen
* when building GET strings */
#if 0
case ' ':
bufputc(ob, '+');
break;
#endif
/* every other character goes with a %XX escaping */
default:
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
hex_str[2] = hex_chars[src[i] & 0xF];
bufput(ob, hex_str, 3);
}
i++;
}
}

View File

@ -1,84 +0,0 @@
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include "houdini.h"
#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) /* this is very scientific, yes */
/**
* According to the OWASP rules:
*
* & --> &amp;
* < --> &lt;
* > --> &gt;
* " --> &quot;
* ' --> &#x27; &apos; is not recommended
* / --> &#x2F; forward slash is included as it helps end an HTML entity
*
*/
static const char HTML_ESCAPE_TABLE[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static const char *HTML_ESCAPES[] = {
"",
"&quot;",
"&amp;",
"&#39;",
"&#47;",
"&lt;",
"&gt;"
};
void
houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure)
{
size_t i = 0, org, esc = 0;
bufgrow(ob, ESCAPE_GROW_FACTOR(size));
while (i < size) {
org = i;
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
i++;
if (i > org)
bufput(ob, src + org, i - org);
/* escaping */
if (i >= size)
break;
/* The forward slash is only escaped in secure mode */
if (src[i] == '/' && !secure) {
bufputc(ob, '/');
} else {
bufputs(ob, HTML_ESCAPES[esc]);
}
i++;
}
}
void
houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size)
{
houdini_escape_html0(ob, src, size, 1);
}

View File

@ -1,635 +0,0 @@
/*
* Copyright (c) 2009, Natacha Porté
* Copyright (c) 2011, Vicent Marti
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "markdown.h"
#include "html.h"
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include "houdini.h"
#define USE_XHTML(opt) (opt->flags & HTML_USE_XHTML)
int
sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname)
{
size_t i;
int closed = 0;
if (tag_size < 3 || tag_data[0] != '<')
return HTML_TAG_NONE;
i = 1;
if (tag_data[i] == '/') {
closed = 1;
i++;
}
for (; i < tag_size; ++i, ++tagname) {
if (*tagname == 0)
break;
if (tag_data[i] != *tagname)
return HTML_TAG_NONE;
}
if (i == tag_size)
return HTML_TAG_NONE;
if (isspace(tag_data[i]) || tag_data[i] == '>')
return closed ? HTML_TAG_CLOSE : HTML_TAG_OPEN;
return HTML_TAG_NONE;
}
static inline void escape_html(struct buf *ob, const uint8_t *source, size_t length)
{
houdini_escape_html0(ob, source, length, 0);
}
static inline void escape_href(struct buf *ob, const uint8_t *source, size_t length)
{
houdini_escape_href(ob, source, length);
}
/********************
* GENERIC RENDERER *
********************/
static int
rndr_autolink(struct buf *ob, const struct buf *link, enum mkd_autolink type, void *opaque)
{
struct html_renderopt *options = opaque;
if (!link || !link->size)
return 0;
if ((options->flags & HTML_SAFELINK) != 0 &&
!sd_autolink_issafe(link->data, link->size) &&
type != MKDA_EMAIL)
return 0;
BUFPUTSL(ob, "<a href=\"");
if (type == MKDA_EMAIL)
BUFPUTSL(ob, "mailto:");
escape_href(ob, link->data, link->size);
if (options->link_attributes) {
bufputc(ob, '\"');
options->link_attributes(ob, link, opaque);
bufputc(ob, '>');
} else {
BUFPUTSL(ob, "\">");
}
/*
* Pretty printing: if we get an email address as
* an actual URI, e.g. `mailto:foo@bar.com`, we don't
* want to print the `mailto:` prefix
*/
if (bufprefix(link, "mailto:") == 0) {
escape_html(ob, link->data + 7, link->size - 7);
} else {
escape_html(ob, link->data, link->size);
}
BUFPUTSL(ob, "</a>");
return 1;
}
static void
rndr_blockcode(struct buf *ob, const struct buf *text, const struct buf *lang, void *opaque)
{
if (ob->size) bufputc(ob, '\n');
if (lang && lang->size) {
size_t i, cls;
BUFPUTSL(ob, "<pre><code class=\"");
for (i = 0, cls = 0; i < lang->size; ++i, ++cls) {
while (i < lang->size && isspace(lang->data[i]))
i++;
if (i < lang->size) {
size_t org = i;
while (i < lang->size && !isspace(lang->data[i]))
i++;
if (lang->data[org] == '.')
org++;
if (cls) bufputc(ob, ' ');
escape_html(ob, lang->data + org, i - org);
}
}
BUFPUTSL(ob, "\">");
} else
BUFPUTSL(ob, "<pre><code>");
if (text)
escape_html(ob, text->data, text->size);
BUFPUTSL(ob, "</code></pre>\n");
}
static void
rndr_blockquote(struct buf *ob, const struct buf *text, void *opaque)
{
if (ob->size) bufputc(ob, '\n');
BUFPUTSL(ob, "<blockquote>\n");
if (text) bufput(ob, text->data, text->size);
BUFPUTSL(ob, "</blockquote>\n");
}
static int
rndr_codespan(struct buf *ob, const struct buf *text, void *opaque)
{
BUFPUTSL(ob, "<code>");
if (text) escape_html(ob, text->data, text->size);
BUFPUTSL(ob, "</code>");
return 1;
}
static int
rndr_strikethrough(struct buf *ob, const struct buf *text, void *opaque)
{
if (!text || !text->size)
return 0;
BUFPUTSL(ob, "<del>");
bufput(ob, text->data, text->size);
BUFPUTSL(ob, "</del>");
return 1;
}
static int
rndr_double_emphasis(struct buf *ob, const struct buf *text, void *opaque)
{
if (!text || !text->size)
return 0;
BUFPUTSL(ob, "<strong>");
bufput(ob, text->data, text->size);
BUFPUTSL(ob, "</strong>");
return 1;
}
static int
rndr_emphasis(struct buf *ob, const struct buf *text, void *opaque)
{
if (!text || !text->size) return 0;
BUFPUTSL(ob, "<em>");
if (text) bufput(ob, text->data, text->size);
BUFPUTSL(ob, "</em>");
return 1;
}
static int
rndr_linebreak(struct buf *ob, void *opaque)
{
struct html_renderopt *options = opaque;
bufputs(ob, USE_XHTML(options) ? "<br/>\n" : "<br>\n");
return 1;
}
static void
rndr_header(struct buf *ob, const struct buf *text, int level, void *opaque)
{
struct html_renderopt *options = opaque;
if (ob->size)
bufputc(ob, '\n');
if (options->flags & HTML_TOC)
bufprintf(ob, "<h%d id=\"toc_%d\">", level, options->toc_data.header_count++);
else
bufprintf(ob, "<h%d>", level);
if (text) bufput(ob, text->data, text->size);
bufprintf(ob, "</h%d>\n", level);
}
static int
rndr_link(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque)
{
struct html_renderopt *options = opaque;
if (link != NULL && (options->flags & HTML_SAFELINK) != 0 && !sd_autolink_issafe(link->data, link->size))
return 0;
BUFPUTSL(ob, "<a href=\"");
if (link && link->size)
escape_href(ob, link->data, link->size);
if (title && title->size) {
BUFPUTSL(ob, "\" title=\"");
escape_html(ob, title->data, title->size);
}
if (options->link_attributes) {
bufputc(ob, '\"');
options->link_attributes(ob, link, opaque);
bufputc(ob, '>');
} else {
BUFPUTSL(ob, "\">");
}
if (content && content->size) bufput(ob, content->data, content->size);
BUFPUTSL(ob, "</a>");
return 1;
}
static void
rndr_list(struct buf *ob, const struct buf *text, int flags, void *opaque)
{
if (ob->size) bufputc(ob, '\n');
bufput(ob, flags & MKD_LIST_ORDERED ? "<ol>\n" : "<ul>\n", 5);
if (text) bufput(ob, text->data, text->size);
bufput(ob, flags & MKD_LIST_ORDERED ? "</ol>\n" : "</ul>\n", 6);
}
static void
rndr_listitem(struct buf *ob, const struct buf *text, int flags, void *opaque)
{
BUFPUTSL(ob, "<li>");
if (text) {
size_t size = text->size;
while (size && text->data[size - 1] == '\n')
size--;
bufput(ob, text->data, size);
}
BUFPUTSL(ob, "</li>\n");
}
static void
rndr_paragraph(struct buf *ob, const struct buf *text, void *opaque)
{
struct html_renderopt *options = opaque;
size_t i = 0;
if (ob->size) bufputc(ob, '\n');
if (!text || !text->size)
return;
while (i < text->size && isspace(text->data[i])) i++;
if (i == text->size)
return;
BUFPUTSL(ob, "<p>");
if (options->flags & HTML_HARD_WRAP) {
size_t org;
while (i < text->size) {
org = i;
while (i < text->size && text->data[i] != '\n')
i++;
if (i > org)
bufput(ob, text->data + org, i - org);
/*
* do not insert a line break if this newline
* is the last character on the paragraph
*/
if (i >= text->size - 1)
break;
rndr_linebreak(ob, opaque);
i++;
}
} else {
bufput(ob, &text->data[i], text->size - i);
}
BUFPUTSL(ob, "</p>\n");
}
static void
rndr_raw_block(struct buf *ob, const struct buf *text, void *opaque)
{
size_t org, sz;
if (!text) return;
sz = text->size;
while (sz > 0 && text->data[sz - 1] == '\n') sz--;
org = 0;
while (org < sz && text->data[org] == '\n') org++;
if (org >= sz) return;
if (ob->size) bufputc(ob, '\n');
bufput(ob, text->data + org, sz - org);
bufputc(ob, '\n');
}
static int
rndr_triple_emphasis(struct buf *ob, const struct buf *text, void *opaque)
{
if (!text || !text->size) return 0;
BUFPUTSL(ob, "<strong><em>");
bufput(ob, text->data, text->size);
BUFPUTSL(ob, "</em></strong>");
return 1;
}
static void
rndr_hrule(struct buf *ob, void *opaque)
{
struct html_renderopt *options = opaque;
if (ob->size) bufputc(ob, '\n');
bufputs(ob, USE_XHTML(options) ? "<hr/>\n" : "<hr>\n");
}
static int
rndr_image(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *alt, void *opaque)
{
struct html_renderopt *options = opaque;
if (!link || !link->size) return 0;
BUFPUTSL(ob, "<img src=\"");
escape_href(ob, link->data, link->size);
BUFPUTSL(ob, "\" alt=\"");
if (alt && alt->size)
escape_html(ob, alt->data, alt->size);
if (title && title->size) {
BUFPUTSL(ob, "\" title=\"");
escape_html(ob, title->data, title->size); }
bufputs(ob, USE_XHTML(options) ? "\"/>" : "\">");
return 1;
}
static int
rndr_raw_html(struct buf *ob, const struct buf *text, void *opaque)
{
struct html_renderopt *options = opaque;
/* HTML_ESCAPE overrides SKIP_HTML, SKIP_STYLE, SKIP_LINKS and SKIP_IMAGES
* It doens't see if there are any valid tags, just escape all of them. */
if((options->flags & HTML_ESCAPE) != 0) {
escape_html(ob, text->data, text->size);
return 1;
}
if ((options->flags & HTML_SKIP_HTML) != 0)
return 1;
if ((options->flags & HTML_SKIP_STYLE) != 0 &&
sdhtml_is_tag(text->data, text->size, "style"))
return 1;
if ((options->flags & HTML_SKIP_LINKS) != 0 &&
sdhtml_is_tag(text->data, text->size, "a"))
return 1;
if ((options->flags & HTML_SKIP_IMAGES) != 0 &&
sdhtml_is_tag(text->data, text->size, "img"))
return 1;
bufput(ob, text->data, text->size);
return 1;
}
static void
rndr_table(struct buf *ob, const struct buf *header, const struct buf *body, void *opaque)
{
if (ob->size) bufputc(ob, '\n');
BUFPUTSL(ob, "<table><thead>\n");
if (header)
bufput(ob, header->data, header->size);
BUFPUTSL(ob, "</thead><tbody>\n");
if (body)
bufput(ob, body->data, body->size);
BUFPUTSL(ob, "</tbody></table>\n");
}
static void
rndr_tablerow(struct buf *ob, const struct buf *text, void *opaque)
{
BUFPUTSL(ob, "<tr>\n");
if (text)
bufput(ob, text->data, text->size);
BUFPUTSL(ob, "</tr>\n");
}
static void
rndr_tablecell(struct buf *ob, const struct buf *text, int flags, void *opaque)
{
if (flags & MKD_TABLE_HEADER) {
BUFPUTSL(ob, "<th");
} else {
BUFPUTSL(ob, "<td");
}
switch (flags & MKD_TABLE_ALIGNMASK) {
case MKD_TABLE_ALIGN_CENTER:
BUFPUTSL(ob, " align=\"center\">");
break;
case MKD_TABLE_ALIGN_L:
BUFPUTSL(ob, " align=\"left\">");
break;
case MKD_TABLE_ALIGN_R:
BUFPUTSL(ob, " align=\"right\">");
break;
default:
BUFPUTSL(ob, ">");
}
if (text)
bufput(ob, text->data, text->size);
if (flags & MKD_TABLE_HEADER) {
BUFPUTSL(ob, "</th>\n");
} else {
BUFPUTSL(ob, "</td>\n");
}
}
static int
rndr_superscript(struct buf *ob, const struct buf *text, void *opaque)
{
if (!text || !text->size) return 0;
BUFPUTSL(ob, "<sup>");
bufput(ob, text->data, text->size);
BUFPUTSL(ob, "</sup>");
return 1;
}
static void
rndr_normal_text(struct buf *ob, const struct buf *text, void *opaque)
{
if (text)
escape_html(ob, text->data, text->size);
}
static void
toc_header(struct buf *ob, const struct buf *text, int level, void *opaque)
{
struct html_renderopt *options = opaque;
/* set the level offset if this is the first header
* we're parsing for the document */
if (options->toc_data.current_level == 0) {
options->toc_data.level_offset = level - 1;
}
level -= options->toc_data.level_offset;
if (level > options->toc_data.current_level) {
while (level > options->toc_data.current_level) {
BUFPUTSL(ob, "<ul>\n<li>\n");
options->toc_data.current_level++;
}
} else if (level < options->toc_data.current_level) {
BUFPUTSL(ob, "</li>\n");
while (level < options->toc_data.current_level) {
BUFPUTSL(ob, "</ul>\n</li>\n");
options->toc_data.current_level--;
}
BUFPUTSL(ob,"<li>\n");
} else {
BUFPUTSL(ob,"</li>\n<li>\n");
}
bufprintf(ob, "<a href=\"#toc_%d\">", options->toc_data.header_count++);
if (text)
escape_html(ob, text->data, text->size);
BUFPUTSL(ob, "</a>\n");
}
static int
toc_link(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque)
{
if (content && content->size)
bufput(ob, content->data, content->size);
return 1;
}
static void
toc_finalize(struct buf *ob, void *opaque)
{
struct html_renderopt *options = opaque;
while (options->toc_data.current_level > 0) {
BUFPUTSL(ob, "</li>\n</ul>\n");
options->toc_data.current_level--;
}
}
void
sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options)
{
static const struct sd_callbacks cb_default = {
NULL,
NULL,
NULL,
toc_header,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
rndr_codespan,
rndr_double_emphasis,
rndr_emphasis,
NULL,
NULL,
toc_link,
NULL,
rndr_triple_emphasis,
rndr_strikethrough,
rndr_superscript,
NULL,
NULL,
NULL,
toc_finalize,
};
memset(options, 0x0, sizeof(struct html_renderopt));
options->flags = HTML_TOC;
memcpy(callbacks, &cb_default, sizeof(struct sd_callbacks));
}
void
sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options, unsigned int render_flags)
{
static const struct sd_callbacks cb_default = {
rndr_blockcode,
rndr_blockquote,
rndr_raw_block,
rndr_header,
rndr_hrule,
rndr_list,
rndr_listitem,
rndr_paragraph,
rndr_table,
rndr_tablerow,
rndr_tablecell,
rndr_autolink,
rndr_codespan,
rndr_double_emphasis,
rndr_emphasis,
rndr_image,
rndr_linebreak,
rndr_link,
rndr_raw_html,
rndr_triple_emphasis,
rndr_strikethrough,
rndr_superscript,
NULL,
rndr_normal_text,
NULL,
NULL,
};
/* Prepare the options pointer */
memset(options, 0x0, sizeof(struct html_renderopt));
options->flags = render_flags;
/* Prepare the callbacks */
memcpy(callbacks, &cb_default, sizeof(struct sd_callbacks));
if (render_flags & HTML_SKIP_IMAGES)
callbacks->image = NULL;
if (render_flags & HTML_SKIP_LINKS) {
callbacks->link = NULL;
callbacks->autolink = NULL;
}
if (render_flags & HTML_SKIP_HTML || render_flags & HTML_ESCAPE)
callbacks->blockhtml = NULL;
}

View File

@ -1,77 +0,0 @@
/*
* Copyright (c) 2011, Vicent Marti
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef UPSKIRT_HTML_H
#define UPSKIRT_HTML_H
#include "markdown.h"
#include "buffer.h"
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
struct html_renderopt {
struct {
int header_count;
int current_level;
int level_offset;
} toc_data;
unsigned int flags;
/* extra callbacks */
void (*link_attributes)(struct buf *ob, const struct buf *url, void *self);
};
typedef enum {
HTML_SKIP_HTML = (1 << 0),
HTML_SKIP_STYLE = (1 << 1),
HTML_SKIP_IMAGES = (1 << 2),
HTML_SKIP_LINKS = (1 << 3),
HTML_EXPAND_TABS = (1 << 4),
HTML_SAFELINK = (1 << 5),
HTML_TOC = (1 << 6),
HTML_HARD_WRAP = (1 << 7),
HTML_USE_XHTML = (1 << 8),
HTML_ESCAPE = (1 << 9),
} html_render_mode;
typedef enum {
HTML_TAG_NONE = 0,
HTML_TAG_OPEN,
HTML_TAG_CLOSE,
} html_tag;
int
sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname);
extern void
sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr, unsigned int render_flags);
extern void
sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr);
extern void
sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,206 +0,0 @@
/* C code produced by gperf version 3.0.3 */
/* Command-line: gperf -N find_block_tag -H hash_block_tag -C -c -E --ignore-case html_block_names.txt */
/* Computed positions: -k'1-2' */
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
&& ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
&& (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
&& ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
&& ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
&& ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
&& ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
&& ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
&& ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
&& ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
&& ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
&& ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
&& ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
&& ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
&& ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
&& ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
&& ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
&& ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
&& ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
&& ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
&& ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
&& ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
&& ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
/* The character set is not based on ISO-646. */
error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
#endif
/* maximum key range = 37, duplicates = 0 */
#ifndef GPERF_DOWNCASE
#define GPERF_DOWNCASE 1
static unsigned char gperf_downcase[256] =
{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
255
};
#endif
#ifndef GPERF_CASE_STRNCMP
#define GPERF_CASE_STRNCMP 1
static int
gperf_case_strncmp (s1, s2, n)
register const char *s1;
register const char *s2;
register unsigned int n;
{
for (; n > 0;)
{
unsigned char c1 = gperf_downcase[(unsigned char)*s1++];
unsigned char c2 = gperf_downcase[(unsigned char)*s2++];
if (c1 != 0 && c1 == c2)
{
n--;
continue;
}
return (int)c1 - (int)c2;
}
return 0;
}
#endif
#ifdef __GNUC__
__inline
#else
#ifdef __cplusplus
inline
#endif
#endif
static unsigned int
hash_block_tag (str, len)
register const char *str;
register unsigned int len;
{
static const unsigned char asso_values[] =
{
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
8, 30, 25, 20, 15, 10, 38, 38, 38, 38,
38, 38, 38, 38, 38, 38, 0, 38, 0, 38,
5, 5, 5, 15, 0, 38, 38, 0, 15, 10,
0, 38, 38, 15, 0, 5, 38, 38, 38, 38,
38, 38, 38, 38, 38, 38, 38, 38, 0, 38,
0, 38, 5, 5, 5, 15, 0, 38, 38, 0,
15, 10, 0, 38, 38, 15, 0, 5, 38, 38,
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
38, 38, 38, 38, 38, 38, 38
};
register int hval = len;
switch (hval)
{
default:
hval += asso_values[(unsigned char)str[1]+1];
/*FALLTHROUGH*/
case 1:
hval += asso_values[(unsigned char)str[0]];
break;
}
return hval;
}
#ifdef __GNUC__
__inline
#ifdef __GNUC_STDC_INLINE__
__attribute__ ((__gnu_inline__))
#endif
#endif
const char *
find_block_tag (str, len)
register const char *str;
register unsigned int len;
{
enum
{
TOTAL_KEYWORDS = 24,
MIN_WORD_LENGTH = 1,
MAX_WORD_LENGTH = 10,
MIN_HASH_VALUE = 1,
MAX_HASH_VALUE = 37
};
static const char * const wordlist[] =
{
"",
"p",
"dl",
"div",
"math",
"table",
"",
"ul",
"del",
"form",
"blockquote",
"figure",
"ol",
"fieldset",
"",
"h1",
"",
"h6",
"pre",
"", "",
"script",
"h5",
"noscript",
"",
"style",
"iframe",
"h4",
"ins",
"", "", "",
"h3",
"", "", "", "",
"h2"
};
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
{
register int key = hash_block_tag (str, len);
if (key <= MAX_HASH_VALUE && key >= 0)
{
register const char *s = wordlist[key];
if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0')
return s;
}
}
return 0;
}

View File

@ -1,389 +0,0 @@
/*
* Copyright (c) 2011, Vicent Marti
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "buffer.h"
#include "html.h"
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#if defined(_WIN32)
#define snprintf _snprintf
#endif
struct smartypants_data {
int in_squote;
int in_dquote;
};
static size_t smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t (*smartypants_cb_ptrs[])
(struct buf *, struct smartypants_data *, uint8_t, const uint8_t *, size_t) =
{
NULL, /* 0 */
smartypants_cb__dash, /* 1 */
smartypants_cb__parens, /* 2 */
smartypants_cb__squote, /* 3 */
smartypants_cb__dquote, /* 4 */
smartypants_cb__amp, /* 5 */
smartypants_cb__period, /* 6 */
smartypants_cb__number, /* 7 */
smartypants_cb__ltag, /* 8 */
smartypants_cb__backtick, /* 9 */
smartypants_cb__escape, /* 10 */
};
static const uint8_t smartypants_cb_chars[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0,
0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static inline int
word_boundary(uint8_t c)
{
return c == 0 || isspace(c) || ispunct(c);
}
static int
smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open)
{
char ent[8];
if (*is_open && !word_boundary(next_char))
return 0;
if (!(*is_open) && !word_boundary(previous_char))
return 0;
snprintf(ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote);
*is_open = !(*is_open);
bufputs(ob, ent);
return 1;
}
static size_t
smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (size >= 2) {
uint8_t t1 = tolower(text[1]);
if (t1 == '\'') {
if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
return 1;
}
if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') &&
(size == 3 || word_boundary(text[2]))) {
BUFPUTSL(ob, "&rsquo;");
return 0;
}
if (size >= 3) {
uint8_t t2 = tolower(text[2]);
if (((t1 == 'r' && t2 == 'e') ||
(t1 == 'l' && t2 == 'l') ||
(t1 == 'v' && t2 == 'e')) &&
(size == 4 || word_boundary(text[3]))) {
BUFPUTSL(ob, "&rsquo;");
return 0;
}
}
}
if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote))
return 0;
bufputc(ob, text[0]);
return 0;
}
static size_t
smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (size >= 3) {
uint8_t t1 = tolower(text[1]);
uint8_t t2 = tolower(text[2]);
if (t1 == 'c' && t2 == ')') {
BUFPUTSL(ob, "&copy;");
return 2;
}
if (t1 == 'r' && t2 == ')') {
BUFPUTSL(ob, "&reg;");
return 2;
}
if (size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')') {
BUFPUTSL(ob, "&trade;");
return 3;
}
}
bufputc(ob, text[0]);
return 0;
}
static size_t
smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (size >= 3 && text[1] == '-' && text[2] == '-') {
BUFPUTSL(ob, "&mdash;");
return 2;
}
if (size >= 2 && text[1] == '-') {
BUFPUTSL(ob, "&ndash;");
return 1;
}
bufputc(ob, text[0]);
return 0;
}
static size_t
smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (size >= 6 && memcmp(text, "&quot;", 6) == 0) {
if (smartypants_quotes(ob, previous_char, size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote))
return 5;
}
if (size >= 4 && memcmp(text, "&#0;", 4) == 0)
return 3;
bufputc(ob, '&');
return 0;
}
static size_t
smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (size >= 3 && text[1] == '.' && text[2] == '.') {
BUFPUTSL(ob, "&hellip;");
return 2;
}
if (size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.') {
BUFPUTSL(ob, "&hellip;");
return 4;
}
bufputc(ob, text[0]);
return 0;
}
static size_t
smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (size >= 2 && text[1] == '`') {
if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
return 1;
}
return 0;
}
static size_t
smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (word_boundary(previous_char) && size >= 3) {
if (text[0] == '1' && text[1] == '/' && text[2] == '2') {
if (size == 3 || word_boundary(text[3])) {
BUFPUTSL(ob, "&frac12;");
return 2;
}
}
if (text[0] == '1' && text[1] == '/' && text[2] == '4') {
if (size == 3 || word_boundary(text[3]) ||
(size >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h')) {
BUFPUTSL(ob, "&frac14;");
return 2;
}
}
if (text[0] == '3' && text[1] == '/' && text[2] == '4') {
if (size == 3 || word_boundary(text[3]) ||
(size >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's')) {
BUFPUTSL(ob, "&frac34;");
return 2;
}
}
}
bufputc(ob, text[0]);
return 0;
}
static size_t
smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (!smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote))
BUFPUTSL(ob, "&quot;");
return 0;
}
static size_t
smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
static const char *skip_tags[] = {
"pre", "code", "var", "samp", "kbd", "math", "script", "style"
};
static const size_t skip_tags_count = 8;
size_t tag, i = 0;
while (i < size && text[i] != '>')
i++;
for (tag = 0; tag < skip_tags_count; ++tag) {
if (sdhtml_is_tag(text, size, skip_tags[tag]) == HTML_TAG_OPEN)
break;
}
if (tag < skip_tags_count) {
for (;;) {
while (i < size && text[i] != '<')
i++;
if (i == size)
break;
if (sdhtml_is_tag(text + i, size - i, skip_tags[tag]) == HTML_TAG_CLOSE)
break;
i++;
}
while (i < size && text[i] != '>')
i++;
}
bufput(ob, text, i + 1);
return i;
}
static size_t
smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (size < 2)
return 0;
switch (text[1]) {
case '\\':
case '"':
case '\'':
case '.':
case '-':
case '`':
bufputc(ob, text[1]);
return 1;
default:
bufputc(ob, '\\');
return 0;
}
}
#if 0
static struct {
uint8_t c0;
const uint8_t *pattern;
const uint8_t *entity;
int skip;
} smartypants_subs[] = {
{ '\'', "'s>", "&rsquo;", 0 },
{ '\'', "'t>", "&rsquo;", 0 },
{ '\'', "'re>", "&rsquo;", 0 },
{ '\'', "'ll>", "&rsquo;", 0 },
{ '\'', "'ve>", "&rsquo;", 0 },
{ '\'', "'m>", "&rsquo;", 0 },
{ '\'', "'d>", "&rsquo;", 0 },
{ '-', "--", "&mdash;", 1 },
{ '-', "<->", "&ndash;", 0 },
{ '.', "...", "&hellip;", 2 },
{ '.', ". . .", "&hellip;", 4 },
{ '(', "(c)", "&copy;", 2 },
{ '(', "(r)", "&reg;", 2 },
{ '(', "(tm)", "&trade;", 3 },
{ '3', "<3/4>", "&frac34;", 2 },
{ '3', "<3/4ths>", "&frac34;", 2 },
{ '1', "<1/2>", "&frac12;", 2 },
{ '1', "<1/4>", "&frac14;", 2 },
{ '1', "<1/4th>", "&frac14;", 2 },
{ '&', "&#0;", 0, 3 },
};
#endif
void
sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size)
{
size_t i;
struct smartypants_data smrt = {0, 0};
if (!text)
return;
bufgrow(ob, size);
for (i = 0; i < size; ++i) {
size_t org;
uint8_t action = 0;
org = i;
while (i < size && (action = smartypants_cb_chars[text[i]]) == 0)
i++;
if (i > org)
bufput(ob, text + org, i - org);
if (i < size) {
i += smartypants_cb_ptrs[(int)action]
(ob, &smrt, i ? text[i - 1] : 0, text + i, size - i);
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,138 +0,0 @@
/* markdown.h - generic markdown parser */
/*
* Copyright (c) 2009, Natacha Porté
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef UPSKIRT_MARKDOWN_H
#define UPSKIRT_MARKDOWN_H
#include "buffer.h"
#include "autolink.h"
#ifdef __cplusplus
extern "C" {
#endif
#define SUNDOWN_VERSION "1.16.0"
#define SUNDOWN_VER_MAJOR 1
#define SUNDOWN_VER_MINOR 16
#define SUNDOWN_VER_REVISION 0
/********************
* TYPE DEFINITIONS *
********************/
/* mkd_autolink - type of autolink */
enum mkd_autolink {
MKDA_NOT_AUTOLINK, /* used internally when it is not an autolink*/
MKDA_NORMAL, /* normal http/http/ftp/mailto/etc link */
MKDA_EMAIL, /* e-mail link without explit mailto: */
};
enum mkd_tableflags {
MKD_TABLE_ALIGN_L = 1,
MKD_TABLE_ALIGN_R = 2,
MKD_TABLE_ALIGN_CENTER = 3,
MKD_TABLE_ALIGNMASK = 3,
MKD_TABLE_HEADER = 4
};
enum mkd_extensions {
MKDEXT_NO_INTRA_EMPHASIS = (1 << 0),
MKDEXT_TABLES = (1 << 1),
MKDEXT_FENCED_CODE = (1 << 2),
MKDEXT_AUTOLINK = (1 << 3),
MKDEXT_STRIKETHROUGH = (1 << 4),
MKDEXT_SPACE_HEADERS = (1 << 6),
MKDEXT_SUPERSCRIPT = (1 << 7),
MKDEXT_LAX_SPACING = (1 << 8),
};
/* sd_callbacks - functions for rendering parsed data */
struct sd_callbacks {
/* block level callbacks - NULL skips the block */
void (*blockcode)(struct buf *ob, const struct buf *text, const struct buf *lang, void *opaque);
void (*blockquote)(struct buf *ob, const struct buf *text, void *opaque);
void (*blockhtml)(struct buf *ob,const struct buf *text, void *opaque);
void (*header)(struct buf *ob, const struct buf *text, int level, void *opaque);
void (*hrule)(struct buf *ob, void *opaque);
void (*list)(struct buf *ob, const struct buf *text, int flags, void *opaque);
void (*listitem)(struct buf *ob, const struct buf *text, int flags, void *opaque);
void (*paragraph)(struct buf *ob, const struct buf *text, void *opaque);
void (*table)(struct buf *ob, const struct buf *header, const struct buf *body, void *opaque);
void (*table_row)(struct buf *ob, const struct buf *text, void *opaque);
void (*table_cell)(struct buf *ob, const struct buf *text, int flags, void *opaque);
/* span level callbacks - NULL or return 0 prints the span verbatim */
int (*autolink)(struct buf *ob, const struct buf *link, enum mkd_autolink type, void *opaque);
int (*codespan)(struct buf *ob, const struct buf *text, void *opaque);
int (*double_emphasis)(struct buf *ob, const struct buf *text, void *opaque);
int (*emphasis)(struct buf *ob, const struct buf *text, void *opaque);
int (*image)(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *alt, void *opaque);
int (*linebreak)(struct buf *ob, void *opaque);
int (*link)(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque);
int (*raw_html_tag)(struct buf *ob, const struct buf *tag, void *opaque);
int (*triple_emphasis)(struct buf *ob, const struct buf *text, void *opaque);
int (*strikethrough)(struct buf *ob, const struct buf *text, void *opaque);
int (*superscript)(struct buf *ob, const struct buf *text, void *opaque);
/* low level callbacks - NULL copies input directly into the output */
void (*entity)(struct buf *ob, const struct buf *entity, void *opaque);
void (*normal_text)(struct buf *ob, const struct buf *text, void *opaque);
/* header and footer */
void (*doc_header)(struct buf *ob, void *opaque);
void (*doc_footer)(struct buf *ob, void *opaque);
};
struct sd_markdown;
/*********
* FLAGS *
*********/
/* list/listitem flags */
#define MKD_LIST_ORDERED 1
#define MKD_LI_BLOCK 2 /* <li> containing block data */
/**********************
* EXPORTED FUNCTIONS *
**********************/
extern struct sd_markdown *
sd_markdown_new(
unsigned int extensions,
size_t max_nesting,
const struct sd_callbacks *callbacks,
void *opaque);
extern void
sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md);
extern void
sd_markdown_free(struct sd_markdown *md);
extern void
sd_version(int *major, int *minor, int *revision);
#ifdef __cplusplus
}
#endif
#endif
/* vim: set filetype=c: */

View File

@ -1,81 +0,0 @@
#include "stack.h"
#include <string.h>
int
stack_grow(struct stack *st, size_t new_size)
{
void **new_st;
if (st->asize >= new_size)
return 0;
new_st = realloc(st->item, new_size * sizeof(void *));
if (new_st == NULL)
return -1;
memset(new_st + st->asize, 0x0,
(new_size - st->asize) * sizeof(void *));
st->item = new_st;
st->asize = new_size;
if (st->size > new_size)
st->size = new_size;
return 0;
}
void
stack_free(struct stack *st)
{
if (!st)
return;
free(st->item);
st->item = NULL;
st->size = 0;
st->asize = 0;
}
int
stack_init(struct stack *st, size_t initial_size)
{
st->item = NULL;
st->size = 0;
st->asize = 0;
if (!initial_size)
initial_size = 8;
return stack_grow(st, initial_size);
}
void *
stack_pop(struct stack *st)
{
if (!st->size)
return NULL;
return st->item[--st->size];
}
int
stack_push(struct stack *st, void *item)
{
if (stack_grow(st, st->size * 2) < 0)
return -1;
st->item[st->size++] = item;
return 0;
}
void *
stack_top(struct stack *st)
{
if (!st->size)
return NULL;
return st->item[st->size - 1];
}

View File

@ -1,29 +0,0 @@
#ifndef STACK_H__
#define STACK_H__
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
struct stack {
void **item;
size_t size;
size_t asize;
};
void stack_free(struct stack *);
int stack_grow(struct stack *, size_t);
int stack_init(struct stack *, size_t);
int stack_push(struct stack *, void *);
void *stack_pop(struct stack *);
void *stack_top(struct stack *);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,336 +0,0 @@
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include "sundown/markdown.h"
#include "sundown/buffer.h"
#include "wrapper.h"
#define PROCESS_SPAN(method_name, ...) {\
struct renderopt *opt = opaque;\
PyObject *ret = PyObject_CallMethodObjArgs(\
(PyObject *) opt->self, PyUnicode_FromString(method_name),\
__VA_ARGS__);\
if (ret == NULL || ret == Py_None) {\
PyObject *r_ex = PyErr_Occurred();\
if (r_ex != NULL)\
PyErr_Print();\
return 0;\
}\
if (PyUnicode_Check(ret)) {\
PyObject *byte_string = PyUnicode_AsEncodedString(ret, "utf-8", "strict");\
bufputs(ob, PyBytes_AsString(byte_string));\
} else {\
bufputs(ob, PyBytes_AsString(ret));\
}\
return 1;\
}
#define PROCESS_BLOCK(method_name, ...) {\
struct renderopt *opt = opaque;\
PyObject *ret = PyObject_CallMethodObjArgs(\
(PyObject *) opt->self, PyUnicode_FromString(method_name),\
__VA_ARGS__);\
if (ret == NULL || ret == Py_None) {\
PyObject *r_ex = PyErr_Occurred();\
if (r_ex != NULL)\
PyErr_Print();\
return;\
}\
if (PyUnicode_Check(ret)) {\
PyObject *byte_string = PyUnicode_AsEncodedString(ret, "utf-8", "strict");\
bufputs(ob, PyBytes_AsString(byte_string));\
} else {\
bufputs(ob, PyBytes_AsString(ret));\
}\
}
#define PY_STR(b) (b != NULL ? PyUnicode_FromStringAndSize((const char *) b->data, (int) b->size) : Py_None)
#if PY_MAJOR_VERSION >= 3
#define PY_INT(i) PyLong_FromLong(i)
#else
#define PY_INT(i) PyInt_FromLong(i)
#endif
/* Block level
----------- */
static void
rndr_blockcode(struct buf *ob, const struct buf *text, const struct buf *lang, void *opaque)
{
PROCESS_BLOCK("block_code", PY_STR(text), PY_STR(lang), NULL);
}
static void
rndr_blockquote(struct buf *ob, const struct buf *text, void *opaque)
{
PROCESS_BLOCK("block_quote", PY_STR(text), NULL);
}
static void
rndr_raw_block(struct buf *ob, const struct buf *text, void *opaque)
{
PROCESS_BLOCK("block_html", PY_STR(text), NULL);
}
static void
rndr_header(struct buf *ob, const struct buf *text, int level, void *opaque)
{
PROCESS_BLOCK("header", PY_STR(text), PY_INT(level), NULL);
}
static void
rndr_hrule(struct buf *ob, void *opaque)
{
PROCESS_BLOCK("hrule", NULL);
}
static void
rndr_list(struct buf *ob, const struct buf *text, int flags, void *opaque)
{
PyObject *is_ordered = Py_False;
if (flags & MKD_LIST_ORDERED) {
is_ordered = Py_True;
}
PROCESS_BLOCK("list", PY_STR(text), is_ordered, NULL);
}
static void
rndr_listitem(struct buf *ob, const struct buf *text, int flags, void *opaque)
{
PyObject *is_ordered = Py_False;
if (flags & MKD_LIST_ORDERED) {
is_ordered = Py_True;
}
PROCESS_BLOCK("list_item", PY_STR(text), is_ordered, NULL);
}
static void
rndr_paragraph(struct buf *ob, const struct buf *text, void *opaque)
{
PROCESS_BLOCK("paragraph", PY_STR(text), NULL);
}
static void
rndr_table(struct buf *ob, const struct buf *header, const struct buf *body, void *opaque)
{
PROCESS_BLOCK("table", PY_STR(header), PY_STR(body), NULL);
}
static void
rndr_tablerow(struct buf *ob, const struct buf *text, void *opaque)
{
PROCESS_BLOCK("table_row", PY_STR(text), NULL);
}
static void
rndr_tablecell(struct buf *ob, const struct buf *text, int flags, void *opaque)
{
PROCESS_BLOCK("table_cell", PY_STR(text), PY_INT(flags), NULL);
}
/* Span level
---------- */
static int
rndr_autolink(struct buf *ob, const struct buf *link, enum mkd_autolink type, void *opaque)
{
PyObject *is_email = Py_False;
if (type == MKDA_EMAIL) {
is_email = Py_True;
}
PROCESS_SPAN("autolink", PY_STR(link), is_email, NULL);
}
static int
rndr_codespan(struct buf *ob, const struct buf *text, void *opaque)
{
PROCESS_SPAN("codespan", PY_STR(text), NULL);
}
static int
rndr_double_emphasis(struct buf *ob, const struct buf *text, void *opaque)
{
PROCESS_SPAN("double_emphasis", PY_STR(text), NULL);
}
static int
rndr_emphasis(struct buf *ob, const struct buf *text, void *opaque)
{
PROCESS_SPAN("emphasis", PY_STR(text), NULL);
}
static int
rndr_image(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *alt, void *opaque)
{
PROCESS_SPAN("image", PY_STR(link), PY_STR(title), PY_STR(alt), NULL);
}
static int
rndr_linebreak(struct buf *ob, void *opaque)
{
PROCESS_SPAN("linebreak", NULL);
}
static int
rndr_link(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque)
{
PROCESS_SPAN("link", PY_STR(link), PY_STR(title), PY_STR(content), NULL);
}
static int
rndr_raw_html(struct buf *ob, const struct buf *text, void *opaque)
{
PROCESS_SPAN("raw_html", PY_STR(text), NULL);
}
static int
rndr_triple_emphasis(struct buf *ob, const struct buf *text, void *opaque)
{
PROCESS_SPAN("triple_emphasis", PY_STR(text), NULL);
}
static int
rndr_strikethrough(struct buf *ob, const struct buf *text, void *opaque)
{
PROCESS_SPAN("strikethrough", PY_STR(text), NULL);
}
static int
rndr_superscript(struct buf *ob, const struct buf *text, void *opaque)
{
PROCESS_SPAN("superscript", PY_STR(text), NULL);
}
/* Direct writes
------------- */
static void
rndr_entity(struct buf *ob, const struct buf *text, void *opaque)
{
PROCESS_BLOCK("entity", PY_STR(text), NULL);
}
static void
rndr_normal_text(struct buf *ob, const struct buf *text, void *opaque)
{
PROCESS_BLOCK("normal_text", PY_STR(text), NULL);
}
static void
rndr_doc_header(struct buf *ob, void *opaque)
{
PROCESS_BLOCK("doc_header", NULL);
}
static void
rndr_doc_footer(struct buf *ob, void *opaque)
{
PROCESS_BLOCK("doc_footer", NULL);
}
struct sd_callbacks callback_funcs = {
rndr_blockcode,
rndr_blockquote,
rndr_raw_block,
rndr_header,
rndr_hrule,
rndr_list,
rndr_listitem,
rndr_paragraph,
rndr_table,
rndr_tablerow,
rndr_tablecell,
rndr_autolink,
rndr_codespan,
rndr_double_emphasis,
rndr_emphasis,
rndr_image,
rndr_linebreak,
rndr_link,
rndr_raw_html,
rndr_triple_emphasis,
rndr_strikethrough,
rndr_superscript,
rndr_entity,
rndr_normal_text,
rndr_doc_header,
rndr_doc_footer,
};
const char *method_names[] = {
"block_code",
"block_quote",
"block_html",
"header",
"hrule",
"list",
"list_item",
"paragraph",
"table",
"table_row",
"table_cell",
"autolink",
"codespan",
"double_emphasis",
"emphasis",
"image",
"linebreak",
"link",
"raw_html",
"triple_emphasis",
"strikethrough",
"superscript",
"entity",
"normal_text",
"doc_header",
"doc_footer"
};
const size_t method_count = sizeof(
method_names)/sizeof(char *);

View File

@ -1,15 +0,0 @@
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include "sundown/html.h"
struct renderopt {
struct html_renderopt html;
void *self;
};
extern struct sd_callbacks callback_funcs;
extern const char *method_names[];
extern const size_t method_count;

View File

@ -1,16 +0,0 @@
from sundown cimport buf, html_renderopt, mkd_autolink, sd_callbacks
cdef extern from *:
ctypedef char* const_char_ptr "const char *"
ctypedef char* const_size_t "const size_t"
cdef extern from 'wrapper.h':
struct renderopt:
html_renderopt html
void *self
sd_callbacks callback_funcs
const_char_ptr method_names[]
const_size_t method_count

1
vendor/sundown vendored

@ -1 +0,0 @@
Subproject commit 37728fb2d7137ff7c37d0a474cb827a8d6d846d8