view mercurial/transaction.py @ 13704:a464763e99f1

dirstate: avoid a race with multiple commits in the same process (issue2264, issue2516) The race happens when two commits in a row change the same file without changing its size, *if* those two commits happen in the same second in the same process while holding the same repo lock. For example: commit 1: M a M b commit 2: # same process, same second, same repo lock M b # modify b without changing its size M c This first manifested in transplant, which is the most common way to do multiple commits in the same process. But it can manifest in any script or extension that does multiple commits under the same repo lock. (Thus, the test script tests both transplant and a custom script.) The problem was that dirstate.status() failed to notice the change to b when localrepo is about to do the second commit, meaning that change gets left in the working directory. In the context of transplant, that means either a crash ("RuntimeError: nothing committed after transplant") or a silently inaccurate transplant, depending on whether any other files were modified by the second transplanted changeset. The fix is to make status() work a little harder when we have previously marked files as clean (state 'normal') in the same process. Specifically, dirstate.normal() adds files to self._lastnormal, and other state-changing methods remove them. Then dirstate.status() puts any files in self._lastnormal into state 'lookup', which will make localrepository.status() read file contents to see if it has really changed. So we pay a small performance penalty for the second (and subsequent) commits in the same process, without affecting the common case. Anything that does lots of status updates and checks in the same process could suffer a performance hit. Incidentally, there is a simpler fix: call dirstate.normallookup() on every file updated by commit() at the end of the commit. The trouble with that solution is that it imposes a performance penalty on the common case: it means the next status-dependent hg command after every "hg commit" will be a little bit slower. The patch here is more complex, but only affects performance for the uncommon case.
author Greg Ward <greg@gerg.ca>
date Sun, 20 Mar 2011 17:41:09 -0400 (2011-03-20)
parents 19ad316e5be3
children f366d4c2ff34
line wrap: on
line source
# transaction.py - simple journalling scheme for mercurial
#
# This transaction scheme is intended to gracefully handle program
# errors and interruptions. More serious failures like system crashes
# can be recovered with an fsck-like tool. As the whole repository is
# effectively log-structured, this should amount to simply truncating
# anything that isn't referenced in the changelog.
#
# Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from i18n import _
import os, errno
import error, util

def active(func):
    def _active(self, *args, **kwds):
        if self.count == 0:
            raise error.Abort(_(
                'cannot use transaction when it is already committed/aborted'))
        return func(self, *args, **kwds)
    return _active

def _playback(journal, report, opener, entries, unlink=True):
    for f, o, ignore in entries:
        if o or not unlink:
            try:
                fp = opener(f, 'a')
                fp.truncate(o)
                fp.close()
            except IOError:
                report(_("failed to truncate %s\n") % f)
                raise
        else:
            try:
                fp = opener(f)
                fn = fp.name
                fp.close()
                util.unlink(fn)
            except (IOError, OSError), inst:
                if inst.errno != errno.ENOENT:
                    raise
    util.unlink(journal)

class transaction(object):
    def __init__(self, report, opener, journal, after=None, createmode=None):
        self.count = 1
        self.usages = 1
        self.report = report
        self.opener = opener
        self.after = after
        self.entries = []
        self.map = {}
        self.journal = journal
        self._queue = []

        self.file = util.posixfile(self.journal, "w")
        if createmode is not None:
            os.chmod(self.journal, createmode & 0666)

    def __del__(self):
        if self.journal:
            self._abort()

    @active
    def startgroup(self):
        self._queue.append([])

    @active
    def endgroup(self):
        q = self._queue.pop()
        d = ''.join(['%s\0%d\n' % (x[0], x[1]) for x in q])
        self.entries.extend(q)
        self.file.write(d)
        self.file.flush()

    @active
    def add(self, file, offset, data=None):
        if file in self.map:
            return
        if self._queue:
            self._queue[-1].append((file, offset, data))
            return

        self.entries.append((file, offset, data))
        self.map[file] = len(self.entries) - 1
        # add enough data to the journal to do the truncate
        self.file.write("%s\0%d\n" % (file, offset))
        self.file.flush()

    @active
    def find(self, file):
        if file in self.map:
            return self.entries[self.map[file]]
        return None

    @active
    def replace(self, file, offset, data=None):
        '''
        replace can only replace already committed entries
        that are not pending in the queue
        '''

        if file not in self.map:
            raise KeyError(file)
        index = self.map[file]
        self.entries[index] = (file, offset, data)
        self.file.write("%s\0%d\n" % (file, offset))
        self.file.flush()

    @active
    def nest(self):
        self.count += 1
        self.usages += 1
        return self

    def release(self):
        if self.count > 0:
            self.usages -= 1
        # if the transaction scopes are left without being closed, fail
        if self.count > 0 and self.usages == 0:
            self._abort()

    def running(self):
        return self.count > 0

    @active
    def close(self):
        '''commit the transaction'''
        self.count -= 1
        if self.count != 0:
            return
        self.file.close()
        self.entries = []
        if self.after:
            self.after()
        if os.path.isfile(self.journal):
            util.unlink(self.journal)
        self.journal = None

    @active
    def abort(self):
        '''abort the transaction (generally called on error, or when the
        transaction is not explicitly committed before going out of
        scope)'''
        self._abort()

    def _abort(self):
        self.count = 0
        self.usages = 0
        self.file.close()

        try:
            if not self.entries:
                if self.journal:
                    util.unlink(self.journal)
                return

            self.report(_("transaction abort!\n"))

            try:
                _playback(self.journal, self.report, self.opener,
                          self.entries, False)
                self.report(_("rollback completed\n"))
            except:
                self.report(_("rollback failed - please run hg recover\n"))
        finally:
            self.journal = None


def rollback(opener, file, report):
    entries = []

    fp = util.posixfile(file)
    lines = fp.readlines()
    fp.close()
    for l in lines:
        f, o = l.split('\0')
        entries.append((f, int(o), None))

    _playback(file, report, opener, entries)