view mercurial/revset.py @ 24576:582cfcc843c7

revset: add the 'subrepo' symbol This returns the csets where matching subrepos have changed with respect to the containing repo's first parent. The second parent shouldn't matter, because it is either syncing up to the first parent (i.e. it hasn't changed from the current branch's POV), or the merge changed it with respect to the first parent (which already adds it to the set). There's already a 'subrepo' fileset, but it is prefixed with 'set:', so there should be no ambiguity (in code anyway). The only test I see for it is to revert subrepos named by a glob pattern (in test-subrepo.t, line 58). Since it doesn't return a tracked file, neither 'log "set:subrepo()"' nor 'files "set:subrepo()"' print anything. Therefore, it seems useful to have a revset that will return something for log (and can be added to a revsetalias to be chained with 'file' revsets.) It might be nice to be able to filter for added, modified and removed separately, but add/remove should be rare. It might also be nice to be able to do a 'contains' check, in addition to this mutated check. Maybe it is possible to get those with the existing 'adds', 'contains', 'modifies' and 'removes' by teaching them to chase explicit paths into subrepos. I'm not sure if this should be added to the 'modifies adds removes' line in revset.optimize() (since it is doing an AMR check on .hgsubstate), or if it is OK to put into 'safesymbols' (things like 'file' are on the list, and that takes a regex, among other patterns).
author Matt Harbison <matt_harbison@yahoo.com>
date Wed, 25 Mar 2015 14:56:54 -0400 (2015-03-25)
parents 0e41f110e69e
children c5022f3579b9
line wrap: on
line source
# revset.py - revision set queries for mercurial
#
# Copyright 2010 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

import re
import parser, util, error, discovery, hbisect, phases
import node
import heapq
import match as matchmod
from i18n import _
import encoding
import obsolete as obsmod
import pathutil
import repoview

def _revancestors(repo, revs, followfirst):
    """Like revlog.ancestors(), but supports followfirst."""
    if followfirst:
        cut = 1
    else:
        cut = None
    cl = repo.changelog

    def iterate():
        revqueue, revsnode = None, None
        h = []

        revs.sort(reverse=True)
        revqueue = util.deque(revs)
        if revqueue:
            revsnode = revqueue.popleft()
            heapq.heappush(h, -revsnode)

        seen = set()
        while h:
            current = -heapq.heappop(h)
            if current not in seen:
                if revsnode and current == revsnode:
                    if revqueue:
                        revsnode = revqueue.popleft()
                        heapq.heappush(h, -revsnode)
                seen.add(current)
                yield current
                for parent in cl.parentrevs(current)[:cut]:
                    if parent != node.nullrev:
                        heapq.heappush(h, -parent)

    return generatorset(iterate(), iterasc=False)

def _revdescendants(repo, revs, followfirst):
    """Like revlog.descendants() but supports followfirst."""
    if followfirst:
        cut = 1
    else:
        cut = None

    def iterate():
        cl = repo.changelog
        first = min(revs)
        nullrev = node.nullrev
        if first == nullrev:
            # Are there nodes with a null first parent and a non-null
            # second one? Maybe. Do we care? Probably not.
            for i in cl:
                yield i
        else:
            seen = set(revs)
            for i in cl.revs(first + 1):
                for x in cl.parentrevs(i)[:cut]:
                    if x != nullrev and x in seen:
                        seen.add(i)
                        yield i
                        break

    return generatorset(iterate(), iterasc=True)

def _revsbetween(repo, roots, heads):
    """Return all paths between roots and heads, inclusive of both endpoint
    sets."""
    if not roots:
        return baseset()
    parentrevs = repo.changelog.parentrevs
    visit = list(heads)
    reachable = set()
    seen = {}
    minroot = min(roots)
    roots = set(roots)
    # open-code the post-order traversal due to the tiny size of
    # sys.getrecursionlimit()
    while visit:
        rev = visit.pop()
        if rev in roots:
            reachable.add(rev)
        parents = parentrevs(rev)
        seen[rev] = parents
        for parent in parents:
            if parent >= minroot and parent not in seen:
                visit.append(parent)
    if not reachable:
        return baseset()
    for rev in sorted(seen):
        for parent in seen[rev]:
            if parent in reachable:
                reachable.add(rev)
    return baseset(sorted(reachable))

elements = {
    "(": (21, ("group", 1, ")"), ("func", 1, ")")),
    "##": (20, None, ("_concat", 20)),
    "~": (18, None, ("ancestor", 18)),
    "^": (18, None, ("parent", 18), ("parentpost", 18)),
    "-": (5, ("negate", 19), ("minus", 5)),
    "::": (17, ("dagrangepre", 17), ("dagrange", 17),
           ("dagrangepost", 17)),
    "..": (17, ("dagrangepre", 17), ("dagrange", 17),
           ("dagrangepost", 17)),
    ":": (15, ("rangepre", 15), ("range", 15), ("rangepost", 15)),
    "not": (10, ("not", 10)),
    "!": (10, ("not", 10)),
    "and": (5, None, ("and", 5)),
    "&": (5, None, ("and", 5)),
    "%": (5, None, ("only", 5), ("onlypost", 5)),
    "or": (4, None, ("or", 4)),
    "|": (4, None, ("or", 4)),
    "+": (4, None, ("or", 4)),
    ",": (2, None, ("list", 2)),
    ")": (0, None, None),
    "symbol": (0, ("symbol",), None),
    "string": (0, ("string",), None),
    "end": (0, None, None),
}

keywords = set(['and', 'or', 'not'])

# default set of valid characters for the initial letter of symbols
_syminitletters = set(c for c in [chr(i) for i in xrange(256)]
                      if c.isalnum() or c in '._@' or ord(c) > 127)

# default set of valid characters for non-initial letters of symbols
_symletters = set(c for c in  [chr(i) for i in xrange(256)]
                  if c.isalnum() or c in '-._/@' or ord(c) > 127)

def tokenize(program, lookup=None, syminitletters=None, symletters=None):
    '''
    Parse a revset statement into a stream of tokens

    ``syminitletters`` is the set of valid characters for the initial
    letter of symbols.

    By default, character ``c`` is recognized as valid for initial
    letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.

    ``symletters`` is the set of valid characters for non-initial
    letters of symbols.

    By default, character ``c`` is recognized as valid for non-initial
    letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.

    Check that @ is a valid unquoted token character (issue3686):
    >>> list(tokenize("@::"))
    [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]

    '''
    if syminitletters is None:
        syminitletters = _syminitletters
    if symletters is None:
        symletters = _symletters

    pos, l = 0, len(program)
    while pos < l:
        c = program[pos]
        if c.isspace(): # skip inter-token whitespace
            pass
        elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
            yield ('::', None, pos)
            pos += 1 # skip ahead
        elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
            yield ('..', None, pos)
            pos += 1 # skip ahead
        elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
            yield ('##', None, pos)
            pos += 1 # skip ahead
        elif c in "():,-|&+!~^%": # handle simple operators
            yield (c, None, pos)
        elif (c in '"\'' or c == 'r' and
              program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
            if c == 'r':
                pos += 1
                c = program[pos]
                decode = lambda x: x
            else:
                decode = lambda x: x.decode('string-escape')
            pos += 1
            s = pos
            while pos < l: # find closing quote
                d = program[pos]
                if d == '\\': # skip over escaped characters
                    pos += 2
                    continue
                if d == c:
                    yield ('string', decode(program[s:pos]), s)
                    break
                pos += 1
            else:
                raise error.ParseError(_("unterminated string"), s)
        # gather up a symbol/keyword
        elif c in syminitletters:
            s = pos
            pos += 1
            while pos < l: # find end of symbol
                d = program[pos]
                if d not in symletters:
                    break
                if d == '.' and program[pos - 1] == '.': # special case for ..
                    pos -= 1
                    break
                pos += 1
            sym = program[s:pos]
            if sym in keywords: # operator keywords
                yield (sym, None, s)
            elif '-' in sym:
                # some jerk gave us foo-bar-baz, try to check if it's a symbol
                if lookup and lookup(sym):
                    # looks like a real symbol
                    yield ('symbol', sym, s)
                else:
                    # looks like an expression
                    parts = sym.split('-')
                    for p in parts[:-1]:
                        if p: # possible consecutive -
                            yield ('symbol', p, s)
                        s += len(p)
                        yield ('-', None, pos)
                        s += 1
                    if parts[-1]: # possible trailing -
                        yield ('symbol', parts[-1], s)
            else:
                yield ('symbol', sym, s)
            pos -= 1
        else:
            raise error.ParseError(_("syntax error"), pos)
        pos += 1
    yield ('end', None, pos)

def parseerrordetail(inst):
    """Compose error message from specified ParseError object
    """
    if len(inst.args) > 1:
        return _('at %s: %s') % (inst.args[1], inst.args[0])
    else:
        return inst.args[0]

# helpers

def getstring(x, err):
    if x and (x[0] == 'string' or x[0] == 'symbol'):
        return x[1]
    raise error.ParseError(err)

def getlist(x):
    if not x:
        return []
    if x[0] == 'list':
        return getlist(x[1]) + [x[2]]
    return [x]

def getargs(x, min, max, err):
    l = getlist(x)
    if len(l) < min or (max >= 0 and len(l) > max):
        raise error.ParseError(err)
    return l

def isvalidsymbol(tree):
    """Examine whether specified ``tree`` is valid ``symbol`` or not
    """
    return tree[0] == 'symbol' and len(tree) > 1

def getsymbol(tree):
    """Get symbol name from valid ``symbol`` in ``tree``

    This assumes that ``tree`` is already examined by ``isvalidsymbol``.
    """
    return tree[1]

def isvalidfunc(tree):
    """Examine whether specified ``tree`` is valid ``func`` or not
    """
    return tree[0] == 'func' and len(tree) > 1 and isvalidsymbol(tree[1])

def getfuncname(tree):
    """Get function name from valid ``func`` in ``tree``

    This assumes that ``tree`` is already examined by ``isvalidfunc``.
    """
    return getsymbol(tree[1])

def getfuncargs(tree):
    """Get list of function arguments from valid ``func`` in ``tree``

    This assumes that ``tree`` is already examined by ``isvalidfunc``.
    """
    if len(tree) > 2:
        return getlist(tree[2])
    else:
        return []

def getset(repo, subset, x):
    if not x:
        raise error.ParseError(_("missing argument"))
    s = methods[x[0]](repo, subset, *x[1:])
    if util.safehasattr(s, 'isascending'):
        return s
    return baseset(s)

def _getrevsource(repo, r):
    extra = repo[r].extra()
    for label in ('source', 'transplant_source', 'rebase_source'):
        if label in extra:
            try:
                return repo[extra[label]].rev()
            except error.RepoLookupError:
                pass
    return None

# operator methods

def stringset(repo, subset, x):
    x = repo[x].rev()
    if x in subset:
        return baseset([x])
    return baseset()

def symbolset(repo, subset, x):
    if x in symbols:
        raise error.ParseError(_("can't use %s here") % x)
    return stringset(repo, subset, x)

def rangeset(repo, subset, x, y):
    m = getset(repo, fullreposet(repo), x)
    n = getset(repo, fullreposet(repo), y)

    if not m or not n:
        return baseset()
    m, n = m.first(), n.last()

    if m < n:
        r = spanset(repo, m, n + 1)
    else:
        r = spanset(repo, m, n - 1)
    return r & subset

def dagrange(repo, subset, x, y):
    r = fullreposet(repo)
    xs = _revsbetween(repo, getset(repo, r, x), getset(repo, r, y))
    return xs & subset

def andset(repo, subset, x, y):
    return getset(repo, getset(repo, subset, x), y)

def orset(repo, subset, x, y):
    xl = getset(repo, subset, x)
    yl = getset(repo, subset - xl, y)
    return xl + yl

def notset(repo, subset, x):
    return subset - getset(repo, subset, x)

def listset(repo, subset, a, b):
    raise error.ParseError(_("can't use a list in this context"))

def func(repo, subset, a, b):
    if a[0] == 'symbol' and a[1] in symbols:
        return symbols[a[1]](repo, subset, b)
    raise error.UnknownIdentifier(a[1], symbols.keys())

# functions

def adds(repo, subset, x):
    """``adds(pattern)``
    Changesets that add a file matching pattern.

    The pattern without explicit kind like ``glob:`` is expected to be
    relative to the current directory and match against a file or a
    directory.
    """
    # i18n: "adds" is a keyword
    pat = getstring(x, _("adds requires a pattern"))
    return checkstatus(repo, subset, pat, 1)

def ancestor(repo, subset, x):
    """``ancestor(*changeset)``
    A greatest common ancestor of the changesets.

    Accepts 0 or more changesets.
    Will return empty list when passed no args.
    Greatest common ancestor of a single changeset is that changeset.
    """
    # i18n: "ancestor" is a keyword
    l = getlist(x)
    rl = fullreposet(repo)
    anc = None

    # (getset(repo, rl, i) for i in l) generates a list of lists
    for revs in (getset(repo, rl, i) for i in l):
        for r in revs:
            if anc is None:
                anc = repo[r]
            else:
                anc = anc.ancestor(repo[r])

    if anc is not None and anc.rev() in subset:
        return baseset([anc.rev()])
    return baseset()

def _ancestors(repo, subset, x, followfirst=False):
    heads = getset(repo, fullreposet(repo), x)
    if not heads:
        return baseset()
    s = _revancestors(repo, heads, followfirst)
    return subset & s

def ancestors(repo, subset, x):
    """``ancestors(set)``
    Changesets that are ancestors of a changeset in set.
    """
    return _ancestors(repo, subset, x)

def _firstancestors(repo, subset, x):
    # ``_firstancestors(set)``
    # Like ``ancestors(set)`` but follows only the first parents.
    return _ancestors(repo, subset, x, followfirst=True)

def ancestorspec(repo, subset, x, n):
    """``set~n``
    Changesets that are the Nth ancestor (first parents only) of a changeset
    in set.
    """
    try:
        n = int(n[1])
    except (TypeError, ValueError):
        raise error.ParseError(_("~ expects a number"))
    ps = set()
    cl = repo.changelog
    for r in getset(repo, fullreposet(repo), x):
        for i in range(n):
            r = cl.parentrevs(r)[0]
        ps.add(r)
    return subset & ps

def author(repo, subset, x):
    """``author(string)``
    Alias for ``user(string)``.
    """
    # i18n: "author" is a keyword
    n = encoding.lower(getstring(x, _("author requires a string")))
    kind, pattern, matcher = _substringmatcher(n)
    return subset.filter(lambda x: matcher(encoding.lower(repo[x].user())))

def bisect(repo, subset, x):
    """``bisect(string)``
    Changesets marked in the specified bisect status:

    - ``good``, ``bad``, ``skip``: csets explicitly marked as good/bad/skip
    - ``goods``, ``bads``      : csets topologically good/bad
    - ``range``              : csets taking part in the bisection
    - ``pruned``             : csets that are goods, bads or skipped
    - ``untested``           : csets whose fate is yet unknown
    - ``ignored``            : csets ignored due to DAG topology
    - ``current``            : the cset currently being bisected
    """
    # i18n: "bisect" is a keyword
    status = getstring(x, _("bisect requires a string")).lower()
    state = set(hbisect.get(repo, status))
    return subset & state

# Backward-compatibility
# - no help entry so that we do not advertise it any more
def bisected(repo, subset, x):
    return bisect(repo, subset, x)

def bookmark(repo, subset, x):
    """``bookmark([name])``
    The named bookmark or all bookmarks.

    If `name` starts with `re:`, the remainder of the name is treated as
    a regular expression. To match a bookmark that actually starts with `re:`,
    use the prefix `literal:`.
    """
    # i18n: "bookmark" is a keyword
    args = getargs(x, 0, 1, _('bookmark takes one or no arguments'))
    if args:
        bm = getstring(args[0],
                       # i18n: "bookmark" is a keyword
                       _('the argument to bookmark must be a string'))
        kind, pattern, matcher = _stringmatcher(bm)
        bms = set()
        if kind == 'literal':
            bmrev = repo._bookmarks.get(pattern, None)
            if not bmrev:
                raise error.RepoLookupError(_("bookmark '%s' does not exist")
                                            % bm)
            bms.add(repo[bmrev].rev())
        else:
            matchrevs = set()
            for name, bmrev in repo._bookmarks.iteritems():
                if matcher(name):
                    matchrevs.add(bmrev)
            if not matchrevs:
                raise error.RepoLookupError(_("no bookmarks exist"
                                              " that match '%s'") % pattern)
            for bmrev in matchrevs:
                bms.add(repo[bmrev].rev())
    else:
        bms = set([repo[r].rev()
                   for r in repo._bookmarks.values()])
    bms -= set([node.nullrev])
    return subset & bms

def branch(repo, subset, x):
    """``branch(string or set)``
    All changesets belonging to the given branch or the branches of the given
    changesets.

    If `string` starts with `re:`, the remainder of the name is treated as
    a regular expression. To match a branch that actually starts with `re:`,
    use the prefix `literal:`.
    """
    getbi = repo.revbranchcache().branchinfo

    try:
        b = getstring(x, '')
    except error.ParseError:
        # not a string, but another revspec, e.g. tip()
        pass
    else:
        kind, pattern, matcher = _stringmatcher(b)
        if kind == 'literal':
            # note: falls through to the revspec case if no branch with
            # this name exists
            if pattern in repo.branchmap():
                return subset.filter(lambda r: matcher(getbi(r)[0]))
        else:
            return subset.filter(lambda r: matcher(getbi(r)[0]))

    s = getset(repo, fullreposet(repo), x)
    b = set()
    for r in s:
        b.add(getbi(r)[0])
    c = s.__contains__
    return subset.filter(lambda r: c(r) or getbi(r)[0] in b)

def bumped(repo, subset, x):
    """``bumped()``
    Mutable changesets marked as successors of public changesets.

    Only non-public and non-obsolete changesets can be `bumped`.
    """
    # i18n: "bumped" is a keyword
    getargs(x, 0, 0, _("bumped takes no arguments"))
    bumped = obsmod.getrevs(repo, 'bumped')
    return subset & bumped

def bundle(repo, subset, x):
    """``bundle()``
    Changesets in the bundle.

    Bundle must be specified by the -R option."""

    try:
        bundlerevs = repo.changelog.bundlerevs
    except AttributeError:
        raise util.Abort(_("no bundle provided - specify with -R"))
    return subset & bundlerevs

def checkstatus(repo, subset, pat, field):
    hasset = matchmod.patkind(pat) == 'set'

    mcache = [None]
    def matches(x):
        c = repo[x]
        if not mcache[0] or hasset:
            mcache[0] = matchmod.match(repo.root, repo.getcwd(), [pat], ctx=c)
        m = mcache[0]
        fname = None
        if not m.anypats() and len(m.files()) == 1:
            fname = m.files()[0]
        if fname is not None:
            if fname not in c.files():
                return False
        else:
            for f in c.files():
                if m(f):
                    break
            else:
                return False
        files = repo.status(c.p1().node(), c.node())[field]
        if fname is not None:
            if fname in files:
                return True
        else:
            for f in files:
                if m(f):
                    return True

    return subset.filter(matches)

def _children(repo, narrow, parentset):
    cs = set()
    if not parentset:
        return baseset(cs)
    pr = repo.changelog.parentrevs
    minrev = min(parentset)
    for r in narrow:
        if r <= minrev:
            continue
        for p in pr(r):
            if p in parentset:
                cs.add(r)
    return baseset(cs)

def children(repo, subset, x):
    """``children(set)``
    Child changesets of changesets in set.
    """
    s = getset(repo, fullreposet(repo), x)
    cs = _children(repo, subset, s)
    return subset & cs

def closed(repo, subset, x):
    """``closed()``
    Changeset is closed.
    """
    # i18n: "closed" is a keyword
    getargs(x, 0, 0, _("closed takes no arguments"))
    return subset.filter(lambda r: repo[r].closesbranch())

def contains(repo, subset, x):
    """``contains(pattern)``