# HG changeset patch
# User Durham Goode <durham@fb.com>
# Date 1435115830 25200
# Node ID efcefc3522bd92e14362ad661f49cc3037919269
# Parent  e4006703a2877b44d66b6cc784adb6c6c0db7181
pull: consider remotes during discovery

The default dulwich graph walker only walks from refs/heads. During the
discovery phase of fetching this causes it to redownload commits that are only
referenced by refs/remotes. In a normal hggit case, this seems to mean it
redownloads the entire git repo on every hg pull.

Added a --debug to a test to check the object count (it decreased from 21 to 10
as part of this patch).

diff --git a/hggit/git_handler.py b/hggit/git_handler.py
--- a/hggit/git_handler.py
+++ b/hggit/git_handler.py
@@ -1075,7 +1075,15 @@
 
     def fetch_pack(self, remote_name, heads=None):
         client, path = self.get_transport_and_path(remote_name)
-        graphwalker = self.git.get_graph_walker()
+
+        # The dulwich default walk only checks refs/heads/. We also want to
+        # consider remotes when doing discovery, so we build our own list.  We
+        # can't just do 'refs/' here because the tag class doesn't have a
+        # parents function for walking, and older versions of dulwich don't like
+        # that.
+        haveheads = self.git.refs.as_dict('refs/remotes/').values()
+        haveheads.extend(self.git.refs.as_dict('refs/heads/').values())
+        graphwalker = self.git.get_graph_walker(heads=haveheads)
 
         def determine_wants(refs):
             filteredrefs = self.filter_refs(refs, heads)