track git sha for files of interest
we will use this in a future commit to avoid re-reading files that haven't changed
This commit is contained in:
parent
f0e635838f
commit
2898942696
@ -0,0 +1,22 @@
|
|||||||
|
"""track file hash
|
||||||
|
|
||||||
|
Revision ID: 22e0aa22ab8e
|
||||||
|
Revises: 1fb08a62dd91
|
||||||
|
Create Date: 2014-11-13 00:32:24.909035
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = '22e0aa22ab8e'
|
||||||
|
down_revision = '1fb08a62dd91'
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
op.add_column('file', sa.Column('sha', sa.String))
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
op.drop_column('file', 'sha')
|
@ -21,6 +21,7 @@ class File(Base):
|
|||||||
project_id = Column(Integer, ForeignKey('project.id'))
|
project_id = Column(Integer, ForeignKey('project.id'))
|
||||||
name = Column(String, nullable=False)
|
name = Column(String, nullable=False)
|
||||||
path = Column(String)
|
path = Column(String)
|
||||||
|
sha = Column(String)
|
||||||
lines = relationship('Line',
|
lines = relationship('Line',
|
||||||
backref='file',
|
backref='file',
|
||||||
cascade="all, delete, delete-orphan")
|
cascade="all, delete, delete-orphan")
|
||||||
|
@ -26,15 +26,23 @@ def discover(repo_root, organizations):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def _find_files_in_project(path):
|
def _find_files_in_project(path):
|
||||||
"""Return a list of the files managed in the project.
|
"""Return a list of the files managed in the project and their sha hash.
|
||||||
|
|
||||||
Uses 'git ls-files'
|
Uses 'git ls-files -s'
|
||||||
"""
|
"""
|
||||||
with utils.working_dir(path):
|
with utils.working_dir(path):
|
||||||
cmd = subprocess.Popen(['git', 'ls-files', '-z'],
|
# Ask git to tell us the sha hash so we can tell if the file
|
||||||
|
# has changed since we looked at it last.
|
||||||
|
cmd = subprocess.Popen(['git', 'ls-files', '-z', '-s'],
|
||||||
stdout=subprocess.PIPE)
|
stdout=subprocess.PIPE)
|
||||||
output = cmd.communicate()[0]
|
output = cmd.communicate()[0]
|
||||||
return output.split('\0')
|
entries = output.split('\0')
|
||||||
|
for e in entries:
|
||||||
|
if not e:
|
||||||
|
continue
|
||||||
|
metadata, ignore, filename = e.partition('\t')
|
||||||
|
sha = metadata.split(' ')[1]
|
||||||
|
yield (filename, sha)
|
||||||
|
|
||||||
|
|
||||||
class ProjectManager(object):
|
class ProjectManager(object):
|
||||||
@ -104,6 +112,13 @@ class ProjectManager(object):
|
|||||||
def _update_project_files(self, proj_obj):
|
def _update_project_files(self, proj_obj):
|
||||||
"""Update the files stored for each project"""
|
"""Update the files stored for each project"""
|
||||||
LOG.debug('reading file contents in %s', proj_obj.name)
|
LOG.debug('reading file contents in %s', proj_obj.name)
|
||||||
|
|
||||||
|
# FIXME: Need to be smarter about updating files here. We have
|
||||||
|
# the full file contents, so we could compute a hash to see if
|
||||||
|
# the file has changed. Then we only have to delete data for
|
||||||
|
# the files that have changed, and re-read those, rather than
|
||||||
|
# reloading all of the files.
|
||||||
|
|
||||||
# Delete any existing files in case the list of files being
|
# Delete any existing files in case the list of files being
|
||||||
# managed has changed. This naive, and we can do better, but as a
|
# managed has changed. This naive, and we can do better, but as a
|
||||||
# first version it's OK.
|
# first version it's OK.
|
||||||
@ -113,11 +128,11 @@ class ProjectManager(object):
|
|||||||
query.delete()
|
query.delete()
|
||||||
|
|
||||||
# Now load the files currently being managed by git.
|
# Now load the files currently being managed by git.
|
||||||
for filename in _find_files_in_project(proj_obj.path):
|
for filename, sha in _find_files_in_project(proj_obj.path):
|
||||||
fullname = os.path.join(proj_obj.path, filename)
|
fullname = os.path.join(proj_obj.path, filename)
|
||||||
if not os.path.isfile(fullname):
|
if not os.path.isfile(fullname):
|
||||||
continue
|
continue
|
||||||
new_file = File(project=proj_obj, name=filename, path=fullname)
|
new_file = File(project=proj_obj, name=filename, path=fullname, sha=sha)
|
||||||
self.session.add(new_file)
|
self.session.add(new_file)
|
||||||
if any(fnmatch.fnmatch(filename, dnr) for dnr in self._DO_NOT_READ):
|
if any(fnmatch.fnmatch(filename, dnr) for dnr in self._DO_NOT_READ):
|
||||||
LOG.debug('ignoring contents of %s', fullname)
|
LOG.debug('ignoring contents of %s', fullname)
|
||||||
|
Loading…
Reference in New Issue
Block a user