# HG changeset patch # User Durham Goode <durham@fb.com> # Date 1435115830 25200 # Node ID efcefc3522bd92e14362ad661f49cc3037919269 # Parent e4006703a2877b44d66b6cc784adb6c6c0db7181 pull: consider remotes during discovery The default dulwich graph walker only walks from refs/heads. During the discovery phase of fetching this causes it to redownload commits that are only referenced by refs/remotes. In a normal hggit case, this seems to mean it redownloads the entire git repo on every hg pull. Added a --debug to a test to check the object count (it decreased from 21 to 10 as part of this patch). diff --git a/hggit/git_handler.py b/hggit/git_handler.py --- a/hggit/git_handler.py +++ b/hggit/git_handler.py @@ -1075,7 +1075,15 @@ def fetch_pack(self, remote_name, heads=None): client, path = self.get_transport_and_path(remote_name) - graphwalker = self.git.get_graph_walker() + + # The dulwich default walk only checks refs/heads/. We also want to + # consider remotes when doing discovery, so we build our own list. We + # can't just do 'refs/' here because the tag class doesn't have a + # parents function for walking, and older versions of dulwich don't like + # that. + haveheads = self.git.refs.as_dict('refs/remotes/').values() + haveheads.extend(self.git.refs.as_dict('refs/heads/').values()) + graphwalker = self.git.get_graph_walker(heads=haveheads) def determine_wants(refs): filteredrefs = self.filter_refs(refs, heads)