diff --git a/HACKING b/HACKING index 2f364c894..f131e04a1 100644 --- a/HACKING +++ b/HACKING @@ -73,3 +73,43 @@ Docstrings :returns: description of the return value """ + +Text encoding +---------- +- All text within python code should be of type 'unicode'. + + WRONG: + + >>> s = 'foo' + >>> s + 'foo' + >>> type(s) + + + RIGHT: + + >>> u = u'foo' + >>> u + u'foo' + >>> type(u) + + +- Transitions between internal unicode and external strings should always + be immediately and explicitly encoded or decoded. + +- All external text that is not explicitly encoded (database storage, + commandline arguments, etc.) should be presumed to be encoded as utf-8. + + WRONG: + + mystring = infile.readline() + myreturnstring = do_some_magic_with(mystring) + outfile.write(myreturnstring) + + RIGHT: + + mystring = infile.readline() + mytext = s.decode('utf-8') + returntext = do_some_magic_with(mytext) + returnstring = returntext.encode('utf-8') + outfile.write(returnstring)