# HG changeset patch # User Gregory Szorc # Date 1522971105 25200 # Node ID 65250a66b55cef968771b3fe23358998973f17a8 # Parent 0596d27457c607da2bf818aff786048a4d8af330 revlog: move censor logic into main revlog class Previously, the revlog class implemented dummy methods for various censor-related functionality. Revision censoring was (and will continue to be) only possible on filelog instances. So filelog implemented these methods to perform something reasonable. A problem with implementing censoring on filelog is that it assumes filelog is a revlog. Upcoming work to formalize the filelog interface will make this not true. Furthermore, the censoring logic is security-sensitive. I think action-at-a-distance with custom implementation of core revlog APIs in derived classes is a bit dangerous. I think at a minimum the censor logic should live in revlog.py. I was tempted to created a "censored revlog" class that basically pulled these methods out of filelog. But, I wasn't a huge fan of overriding core methods in child classes. A reason to do that would be performance. However, the censoring code only comes into play when: * hash verification fails * delta generation * applying deltas from changegroups The new code is conditional on an instance attribute. So the overhead for running the censored code when the revlog isn't censorable is an attribute lookup. All of these operations are at least a magnitude slower than a Python attribute lookup. So there shouldn't be a performance concern. Differential Revision: https://phab.mercurial-scm.org/D3151 diff --git a/mercurial/filelog.py b/mercurial/filelog.py --- a/mercurial/filelog.py +++ b/mercurial/filelog.py @@ -7,27 +7,20 @@ from __future__ import absolute_import -import struct - from .thirdparty.zope import ( interface as zi, ) from . import ( - error, - mdiff, repository, revlog, ) -def _censoredtext(text): - m, offs = revlog.parsemeta(text) - return m and "censored" in m - @zi.implementer(repository.ifilestorage) class filelog(revlog.revlog): def __init__(self, opener, path): super(filelog, self).__init__(opener, - "/".join(("data", path + ".i"))) + "/".join(("data", path + ".i")), + censorable=True) # full name of the user visible file, relative to the repository root self.filename = path @@ -90,35 +83,3 @@ return t2 != text return True - - def checkhash(self, text, node, p1=None, p2=None, rev=None): - try: - super(filelog, self).checkhash(text, node, p1=p1, p2=p2, rev=rev) - except error.RevlogError: - if _censoredtext(text): - raise error.CensoredNodeError(self.indexfile, node, text) - raise - - def iscensored(self, rev): - """Check if a file revision is censored.""" - return self.flags(rev) & revlog.REVIDX_ISCENSORED - - def _peek_iscensored(self, baserev, delta, flush): - """Quickly check if a delta produces a censored revision.""" - # Fragile heuristic: unless new file meta keys are added alphabetically - # preceding "censored", all censored revisions are prefixed by - # "\1\ncensored:". A delta producing such a censored revision must be a - # full-replacement delta, so we inspect the first and only patch in the - # delta for this prefix. - hlen = struct.calcsize(">lll") - if len(delta) <= hlen: - return False - - oldlen = self.rawsize(baserev) - newlen = len(delta) - hlen - if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen): - return False - - add = "\1\ncensored:" - addlen = len(add) - return newlen >= addlen and delta[hlen:hlen + addlen] == add diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -117,6 +117,10 @@ metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys) return "\1\n%s\1\n%s" % (metatext, text) +def _censoredtext(text): + m, offs = parsemeta(text) + return m and "censored" in m + def addflagprocessor(flag, processor): """Register a flag processor on a revision data flag. @@ -574,9 +578,11 @@ If mmaplargeindex is True, and an mmapindexthreshold is set, the index will be mmapped rather than read if it is larger than the configured threshold. + + If censorable is True, the revlog can have censored revisions. """ def __init__(self, opener, indexfile, datafile=None, checkambig=False, - mmaplargeindex=False): + mmaplargeindex=False, censorable=False): """ create a revlog object @@ -589,6 +595,7 @@ # When True, indexfile is opened with checkambig=True at writing, to # avoid file stat ambiguity. self._checkambig = checkambig + self._censorable = censorable # 3-tuple of (node, rev, text) for a raw revision. self._cache = None # Maps rev to chain base rev. @@ -1867,14 +1874,19 @@ Available as a function so that subclasses can extend hash mismatch behaviors as needed. """ - if p1 is None and p2 is None: - p1, p2 = self.parents(node) - if node != self.hash(text, p1, p2): - revornode = rev - if revornode is None: - revornode = templatefilters.short(hex(node)) - raise RevlogError(_("integrity check failed on %s:%s") - % (self.indexfile, pycompat.bytestr(revornode))) + try: + if p1 is None and p2 is None: + p1, p2 = self.parents(node) + if node != self.hash(text, p1, p2): + revornode = rev + if revornode is None: + revornode = templatefilters.short(hex(node)) + raise RevlogError(_("integrity check failed on %s:%s") + % (self.indexfile, pycompat.bytestr(revornode))) + except RevlogError: + if self._censorable and _censoredtext(text): + raise error.CensoredNodeError(self.indexfile, node, text) + raise def _enforceinlinesize(self, tr, fp=None): """Check if the revlog is too big for inline and convert if so. @@ -2300,11 +2312,33 @@ def iscensored(self, rev): """Check if a file revision is censored.""" - return False + if not self._censorable: + return False + + return self.flags(rev) & REVIDX_ISCENSORED def _peek_iscensored(self, baserev, delta, flush): """Quickly check if a delta produces a censored revision.""" - return False + if not self._censorable: + return False + + # Fragile heuristic: unless new file meta keys are added alphabetically + # preceding "censored", all censored revisions are prefixed by + # "\1\ncensored:". A delta producing such a censored revision must be a + # full-replacement delta, so we inspect the first and only patch in the + # delta for this prefix. + hlen = struct.calcsize(">lll") + if len(delta) <= hlen: + return False + + oldlen = self.rawsize(baserev) + newlen = len(delta) - hlen + if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen): + return False + + add = "\1\ncensored:" + addlen = len(add) + return newlen >= addlen and delta[hlen:hlen + addlen] == add def getstrippoint(self, minlink): """find the minimum rev that must be stripped to strip the linkrev