view query-languages.tex @ 354:30ec1de4d3e4

Added examples
author Martin Geisler <mg@aragost.com>
date Wed, 20 Jun 2012 17:38:27 +0200
parents 5292f85ffa49
children 4563d83d06e8
line wrap: on
line source

\documentclass[t,noamsthm,xcolor=dvipsnames]{beamer}
\input{preambel}
\title{Mercurial's Query Languages}
\author{Martin Geisler
  \texorpdfstring{\\$\langle$\texttt{mg@aragost.com}$\rangle$}
                 {<mg@aragost.com>}}
\date{CCDC, Cambridge\\June 25th--27th, 2012}

\begin{document}

\begin{frame}[plain]
  \maketitle
  \begin{tikzpicture}[overlay, remember picture]
    \node[above right] at (current page.south west)
         {\includegraphics[height=3cm]{pixelman-front}};
  \end{tikzpicture}
\end{frame}

\begin{frame}{Outline}
  \tableofcontents
\end{frame}

\section{Introduction}

\begin{frame}[fragile]{Confusing Histories}
  Big projects can give rise to a branchy history:
  \begin{itemize}
  \item several concurrent branches
  \item many developers pushing changes
  \end{itemize}

  \pause

  Mercurial help you to cut away the unnecessary fluff:
  \begin{itemize}
  \item<2-> Revision sets selects revisions (Mercurial 1.6):
\begin{lstlisting}
$ hg log -r "branch('stable') and user('Martin')"
\end{lstlisting}
    Can be used in all places where Mercurial expects revisions

  \item<3-> File sets selects files in revisions (Mercurial 1.9):
\begin{lstlisting}
$ hg revert "set:added() and size('>20MB')"
\end{lstlisting}
    Can be used in all places where Mercurial expects file names

  \end{itemize}
\end{frame}

\begin{frame}[fragile]{Flexibility}
  The query languages lets you solve hard problems:
  \begin{itemize}[<+->]
  \item Imagine you have a dirty working copy:
\begin{lstlisting}
$ hg status
M index.html
A logo.png
\end{lstlisting}
 But how can you see the diff of \path{index.html} only?

\item Easy! You use your nifty Unix shell:
\begin{lstlisting}
$ hg diff $(hg status --no-status --modified)
\end{lstlisting}

\item With file sets you can do
\begin{lstlisting}
$ hg diff "set:modified()"
\end{lstlisting}
and it will work on all platforms

  \end{itemize}
\end{frame}

\begin{frame}[fragile]{Implementation}
  When a revision set is evaluated it is:
  \begin{description}[<+->]
  \item[tokenized:] split input into operators, symbols, strings
  \item[parsed:] build parse tree based on operator precedence
  \item[optimized:] reorders parse tree to evaluate cheap parts first:
\begin{lstlisting}
contains("README") and 1.0::1.5
\end{lstlisting}
starts with a manifest-based query --- reorder to:
\begin{lstlisting}
1.0::1.5 and contains("README")
\end{lstlisting}
  \item[executed:] go through tree and evaluate predicates
  \end{description}
\end{frame}

\begin{frame}[fragile]{Quoting}
  How to handle special characters:
  \begin{itemize}[<+->]
\item You will need to quote your queries on the command line:
\begin{lstlisting}
$ hg log -r parents()
zsh: parse error near `()'
\end{lstlisting}

\item Strings in queries can be in single- or double-quotes:
\begin{lstlisting}
$ hg log -r "user('Martin')"
\end{lstlisting}

\item Escape characters are supported
\begin{lstlisting}
$ hg log -r "keyword('first line\nsecond line')"
\end{lstlisting}

\item Use a raw string to disable the escape characters:
\begin{lstlisting}
$ hg log -r "grep(r'Bug\s*\d+')"
\end{lstlisting}

  \end{itemize}
\end{frame}

\section{Revision Sets}

\subsection{Predicates}

\begin{frame}[fragile]{Predicates}
  Predicates select changesets for inclusion in the resulting set:
  \begin{itemize}
  \item \cmd{closed()}, \cmd{head()}, \cmd{merge()}: simple changeset properties

  \item \cmd{author(string)}, \cmd{date(interval)}: search by user
    name or by commit date

\begin{lstlisting}
$ hg log -r "author('Martin') and merge()"
\end{lstlisting}

  \item \cmd{grep(regex)}, \cmd{keyword(string)}: search in commit
    message, user name, changed file names for a regular expression or
    a substring

  %\item \cmd{bisected(string)}: changesets marked good/bad/skip while
  %  bisecting

  \end{itemize}
\end{frame}

\begin{frame}[fragile]{Matching by Files in Changesets}
  Matching by how a file changed:
  \begin{itemize}
  \item \cmd{adds(pattern)}: a file matching pattern was added
  \item \cmd{modifies(pattern)}: a file matching pattern was modified
  \item \cmd{removes(pattern)}: a file matching pattern was removed
  \item<2-> \cmd{file(pattern)}: combination of all the above
  \item<3-> \cmd{contains(pattern)}: a file matching pattern was present
  \end{itemize}
\end{frame}

\subsection{Functions}

\begin{frame}[fragile]{Following the Changeset Graph}
  A common task is to follow the graph from a particular changeset:
  \begin{itemize}
  \item \cmd{::Y} or \cmd{ancestors(Y)}: ancestors of changesets in Y
  \item \cmd{X::} or \cmd{descendants(X)}: descendants of changesets
    in X
  \item \cmd{X::Y}: a combination of the above, finding changesets between X and Y
  \end{itemize}

  \pause

  Changes that need to be merged into the default branch:
\begin{lstlisting}
$ hg log -r "ancestors(stable) - ancestors(default)"
$ hg log -r "::stable - ::default"
\end{lstlisting}

\end{frame}

\begin{frame}[fragile]{Family Relations}
  
  \begin{itemize}
  \item \cmd{ancestor(single, single)}: greatest common ancestor of
    the two changesets. Used to find out what needs to be merged in a
    merge between X and Y:
\begin{lstlisting}
$ hg log -r "ancestor(X, Y)::Y"
\end{lstlisting}

  \item \cmd{children(set)}, \cmd{parents([set])}: set of all children/parents of set

  \item \cmd{heads(set)}, \cmd{roots(set)}: changesets from set with
    no children/parents in set

\end{itemize}
\end{frame}

\begin{frame}[fragile]{Parents and Grand Parents}
  Going from a changeset to the parent changeset is easy:
  \begin{itemize}
  \item \cmd{p1([set])}, \cmd{p2([set])}: the first/second parent of
    changesets in set or of the working copy if no set is given

  \item \cmd{x\textasciicircum}, \cmd{x\textasciicircum 2}: the
    first/second parent of \cmd x

  \item \cmd{x\textasciitilde n}: the $n$'th first ancestor of \cmd x,
    \cmd{x\textasciitilde 0} is \cmd x, \cmd{x\textasciitilde 3} is
    \cmd{x\textasciicircum\textasciicircum\textasciicircum}
  \end{itemize}

  To see both sides of a merge changeset M use
\begin{lstlisting}
$ hg diff -r "p1(M):M" && hg diff -r "p2(M):M"
\end{lstlisting}
  or the shorter
\begin{lstlisting}
$ hg diff -c M && hg diff -r "M^2:M"
\end{lstlisting}

  %\item \cmd{follow([file])}: follow working copy parents or follow a
  %  file history across renames (like \cmd{hg log -f})
\end{frame}

\begin{frame}[fragile]{The Next Push}
  The \cmd{hg outgoing} command tells what will be pushed, and so does
  this function:
  \begin{itemize}
  \item \cmd{outgoing([path])}: changesets not in the destination
    repository
  \end{itemize}

\pause

  It is now easy to see what you will push as a single diff:
\begin{lstlisting}
$ hg diff -r "outgoing()"
\end{lstlisting}
% \cmd{hg diff} extracts the first/last revision using \cmd{min()} and
% \cmd{max()}

\pause

  It is also easy to reset a repository:
\begin{lstlisting}
$ hg strip "outgoing()"
\end{lstlisting}
  People familiar with Git will know this as
\begin{lstlisting}
$ git reset --hard origin/master
\end{lstlisting}

\end{frame}

\begin{frame}[fragile]{The Next Push (Offline!)}
  With the introduction of \alert{phases} (Mercurial 2.1) you can use:
\begin{lstlisting}
$ hg log -r "not public()"
\end{lstlisting}
  to see unpublished changes without network communication.

  Equivalent to:
\begin{lstlisting}
$ hg log -r "draft() or secret()"
\end{lstlisting}
\end{frame}

%\begin{frame}[fragile]{Handling Missing Revisions}
%  If you don't know if a given revision is present, then use:
%  \begin{itemize}
%  \item \cmd{present(set)}: prevents lookup errors if a revision in
%    set is not found. Used like
%\begin{lstlisting}
%$ hg log -r "head() and (present('bad'):: - present('fix')::)"
%\end{lstlisting}
%    where bad is a known buggy changeset and fix is a bugfix. Without
%    the use of \cmd{present()}, an error would be raised if the bugfix
%    is not yet in the repository.
%  \end{itemize}
%\end{frame}

\begin{frame}[fragile]{Final Touches on Your Query}
  Trimming, cutting, manipulating the set:
  \begin{itemize}[<+->]
    \item \cmd{max(set)}, \cmd{min(set)}: the changeset with
      minimum/maximum revision number in the set

      Commands that need two revisions apply this as needed:
\begin{lstlisting}
$ hg diff -r "not public()"
\end{lstlisting}
behaves like
\begin{lstlisting}
$ hg diff -r "min(not public())" -r "max(not public())"
\end{lstlisting}

    \item \cmd{reverse(set)}: the ``set'' is ordered; this reverses it
    \item \cmd{first(set, n)}, \cmd{last(set, n)}: the first/last
      $n$ changesets
    \item \cmd{sort(set[, [-]key...])}: sorting the set by revision
      number, branch name, changeset message, user name, or date
  \end{itemize}
\end{frame}

\begin{frame}[fragile]{Solving Ambiguities}
  When you do \cmd{hg log -r "foo"}, Mercurial checks
  \begin{enumerate}
  %\item revision number
  %\item full changeset ID
  \item is \cmd{foo} a bookmark?
  \item is \cmd{foo} a tag?
  \item is \cmd{foo} a branch name?
  %\item partial changeset ID
  \end{enumerate}
  First match wins.

  \pause

  You can override this using predicates:
  \begin{itemize}
  %\item \cmd{rev(number)}
  %\item \cmd{id(hash)}
  \item \cmd{bookmark([name])}, \cmd{tag([name])}: the changeset with
    the given bookmark or tag, or all bookmarked/tagged changesets
  \item \cmd{branch(name)}: changesets on the given branch
  \item \cmd{branch(set)}: changesets on the branches of the given set,
    normally used with a single changeset:
\begin{lstlisting}
$ hg log -r "branch(tip)"
\end{lstlisting}
  \end{itemize}
\end{frame}

\subsection{Operators}

\begin{frame}{Operators}
  You can combine two revision sets using:
  \begin{itemize}

  \item \cmd{x and y} or \cmd{x \& y}: changesets in both \cmd x and
    \cmd y

  \item \cmd{x or y} or \cmd{x | y} or \cmd{x + y}: changesets in
    either \cmd x or \cmd y

  \item \cmd{x - y}: changesets in \cmd x but not in \cmd y

  \end{itemize}
\end{frame}

\begin{frame}[fragile]{Examples}
  \begin{itemize}


\item Heads on the current branch:
\begin{lstlisting}
$ hg log -r "head() and branch(.)"
\end{lstlisting}

    Closed heads:
\begin{lstlisting}
$ hg log -r "head() and closed()"
\end{lstlisting}

    Reopened branches:
\begin{lstlisting}
$ hg log -r "closed() and not head()"
\end{lstlisting}

\item Open heads on the current branch:
\begin{lstlisting}
$ hg log -r "head() and branch(.) and not closed()"
\end{lstlisting}

\item Bugfixes that are not in a tagged release:
\begin{lstlisting}
$ hg log -r "keyword(bug) and not ::tagged()"
\end{lstlisting}


  \end{itemize}
\end{frame}


\section{File Sets}

\begin{frame}{Selecting Files}
  File sets let you:
  \begin{itemize}
  \item select files from working copy
  \item select files from old revisions
  \end{itemize}
  Part of Mercurial 1.9
\end{frame}

\subsection{Working Copy Status and Path}

\begin{frame}[fragile]{Working Copy Status}
The predicates are:
\begin{itemize}[<+->]

\item \cmd{modified()}, \cmd{added()}, \cmd{removed()},
  \cmd{deleted()}, \cmd{unknown()}, \cmd{ignored()}, \cmd{clean()}:
  status flags
\begin{lstlisting}
$ hg revert "set:removed()"
\end{lstlisting}

\item \cmd{copied()}: copied files, quite hard to extract today

\item \cmd{hgignore()}: tracked files that \emph{would} be ignored
\begin{lstlisting}
$ hg forget "set:hgignore() and not ignored()"
\end{lstlisting}

\item \cmd{unresolved()}: like \cmd{hg resolve -{}-list} after a merge

\end{itemize}

\end{frame}

\begin{frame}[fragile]{Searching by Path}
  We can replace the \cmd{find} Unix command:
  \begin{itemize}
  \item \cmd{glob(P)} instead of \cmd{find -path P}
  \item \cmd{regex(P)} instead of \cmd{find -regex P}
  \end{itemize}
  Remember that this also works on old revisions:
\begin{lstlisting}
$ hg status -r 1.0::2.0 "set:glob(src/*.h)"
A src/foo.h
M src/bar.h
\end{lstlisting}
This shows that \path{foo.h} is a new header file in version 2.0.
\end{frame}

\begin{frame}{File Type Predicates}
  Other \cmd{find}-like predicates will be:
  \begin{itemize}
    \item \cmd{executable()}, \cmd{symlink()}: file type
    \item \cmd{perm()}, \cmd{owner()}: file permissions
    \item \cmd{date()}, \cmd{size()}: other file meta data
  \end{itemize}
\end{frame}

\subsection{File Content}

\begin{frame}[fragile]{Looking Into Files}
  Matching files by content:
  \begin{itemize}[<+->]
  \item \cmd{grep(regex)}: like the Unix \cmd{grep} we all love
  \item \cmd{contains(string)}: simple sub-string matching
  \item \cmd{binary()}: does file contain a NUL byte?
\begin{lstlisting}
$ hg add "set:unknown() and not binary()"
\end{lstlisting}

  \item \cmd{encoding(enc)}: check if file can be decoded with the given
    character set, such as UTF-8, UTF-16, \dots

    Lets you find mistakes:
\begin{lstlisting}
$ hg locate "set:glob('**.py') and not encoding('UTF-8')"
src/foo.py
\end{lstlisting}

  \end{itemize}
\end{frame}

\begin{frame}{Adding New Predicates}
  The feature will be extensible, some possible future extensions:
  \begin{itemize}
  \item \cmd{eol()}: line-ending type, Unix (LF) or DOS (CRLF)
  \item \cmd{magic()}: recognize files based on file content, like the
    \cmd{file} program in Unix
  \item \cmd{locked()}: files locked for exclusive access by my
    \ext{lock} extension
  \end{itemize}
\end{frame}

\section{Conclusion}

\begin{frame}{Conclusion}
  In short:
  \begin{itemize}
  \item revision sets lets you zoom in on the right part of the history
  \item file sets will let you pick out the relevant files
  \item both mechanisms are completely general
  \end{itemize}

  \pause

  Please get in touch if you have more questions:
  \begin{itemize}
  \item Email: \curl{mg@aragost.com}
  \item IRC: \curl{mg} in \curl{\#mercurial} on \curl{irc.freenode.net}
  \end{itemize}

  \pause

  \begin{center}
    \begin{tikzpicture}
      \tikzstyle{every node}=[font=\Huge\bfseries]
      \node[black, shift={(0.8pt, -0.8pt)}] {Thank you!};
      \node[orange!50!red] {Thank you!};
    \end{tikzpicture}
  \end{center}
\end{frame}

%\appendix
%\newcounter{finalframe}
%\setcounter{finalframe}{\value{framenumber}}
%\setcounter{framenumber}{\value{finalframe}}

\end{document}

% LocalWords:  changeset changesets