Memory efficient loading of RawData in 0003 Migration

This commit is contained in:
Andrew Melton
2013-08-09 11:04:03 -04:00
parent 29db639f30
commit c51b1b39aa

View File

@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
import copy
import gc
from south.v2 import DataMigration
from stacktach.notification import Notification
from stacktach.views import NOTIFICATIONS
@@ -29,6 +30,27 @@ USAGE_EVENTS_EXCEPT_EXISTS = copy.deepcopy(USAGE_EVENTS)
USAGE_EVENTS_EXCEPT_EXISTS.remove('compute.instance.exists')
def queryset_iterator(queryset, chunksize=1000):
'''''
Iterate over a Django Queryset ordered by the primary key
This method loads a maximum of chunksize (default: 1000) rows in it's
memory at the same time while django normally would load all rows in it's
memory. Using the iterator() method only causes it to not preload all the
classes.
Note that the implementation of the iterator does not support ordered query sets.
'''
id = 0
last_pk = queryset.order_by('-id')[0]['id']
queryset = queryset.order_by('id')
while id < last_pk:
for row in queryset.filter(id__gt=id)[:chunksize]:
id = row['id']
yield row
gc.collect()
class Migration(DataMigration):
def _find_latest_usage_related_raw_id_for_request_id(self, orm, request_id):
@@ -52,7 +74,7 @@ class Migration(DataMigration):
# and orm['appname.ModelName'] for models in other applications.
print "Started inserting records in RawDataImageMeta"
rawdata_all = orm.RawData.objects.filter(event__in=USAGE_EVENTS).values('json', 'id')
for rawdata in rawdata_all:
for rawdata in queryset_iterator(rawdata_all):
notification = self._notification(rawdata['json'])
orm.RawDataImageMeta.objects.create(
raw_id=rawdata['id'],