Intern unicode strings comming from external storage
According to my findings, this saves ~0.5Gb of memory during load of current dataset. I used Pympler to get data on most used object types, unicode was the top one (columns are type, instance count, total memory used): unicode 8058199 894651428 With this change numbers are: unicode 1312689 303504132 and unicode went to the 2nd place (after set). No other significant changes noted. Change-Id: I6ae02824071c95e11636c6e4a2ec3699ec3c365f
This commit is contained in:
@@ -15,6 +15,7 @@
|
|||||||
|
|
||||||
import collections
|
import collections
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
import flask
|
import flask
|
||||||
from oslo.config import cfg
|
from oslo.config import cfg
|
||||||
@@ -39,9 +40,28 @@ CompactRecord = collections.namedtuple('CompactRecord',
|
|||||||
RECORD_FIELDS_FOR_AGGREGATE)
|
RECORD_FIELDS_FOR_AGGREGATE)
|
||||||
|
|
||||||
|
|
||||||
|
if six.PY2:
|
||||||
|
_unihash = {}
|
||||||
|
|
||||||
|
def uniintern(o):
|
||||||
|
if not isinstance(o, basestring):
|
||||||
|
return o
|
||||||
|
if isinstance(o, str):
|
||||||
|
return intern(o)
|
||||||
|
if isinstance(o, unicode):
|
||||||
|
return _unihash.setdefault(o, o)
|
||||||
|
else:
|
||||||
|
def uniintern(o):
|
||||||
|
if isinstance(o, str):
|
||||||
|
return sys.intern(o)
|
||||||
|
else:
|
||||||
|
return o
|
||||||
|
|
||||||
|
|
||||||
def compact_records(records):
|
def compact_records(records):
|
||||||
for record in records:
|
for record in records:
|
||||||
compact = dict((k, record.get(k)) for k in RECORD_FIELDS_FOR_AGGREGATE)
|
compact = dict((k, uniintern(record.get(k)))
|
||||||
|
for k in RECORD_FIELDS_FOR_AGGREGATE)
|
||||||
|
|
||||||
yield CompactRecord(**compact)
|
yield CompactRecord(**compact)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user