Mercurial > hg > mercurial-source
view mercurial/urllibcompat.py @ 44087:ffd04bc9f57d
copies: move from a copy on branchpoint to a copy on write approach
Before this changes, any branch points results in a copy of the dictionary containing the
copy information. This can be very costly for branchy history with few rename
information. Instead, we take a "copy on write" approach. Copying the input data
only when we are about to update them.
In practice we where already doing the copying in half of these case (because
`_chain` makes a copy), so we don't add a significant cost here even in the
linear case. However the speed up in branchy case is very significant. Here are
some timing on the pypy repository.
revision: large amount; added files: large amount; rename small amount; c3b14617fbd7 9ba6ab77fd29
before: ! wall 1.399863 comb 1.400000 user 1.370000 sys 0.030000 (median of 10)
after: ! wall 0.766453 comb 0.770000 user 0.750000 sys 0.020000 (median of 11)
revision: large amount; added files: small amount; rename small amount; c3b14617fbd7 f650a9b140d2
before: ! wall 1.876748 comb 1.890000 user 1.870000 sys 0.020000 (median of 10)
after: ! wall 1.167223 comb 1.170000 user 1.150000 sys 0.020000 (median of 10)
revision: large amount; added files: large amount; rename large amount; 08ea3258278e d9fa043f30c0
before: ! wall 0.242457 comb 0.240000 user 0.240000 sys 0.000000 (median of 39)
after: ! wall 0.211476 comb 0.210000 user 0.210000 sys 0.000000 (median of 45)
revision: small amount; added files: large amount; rename large amount; df6f7a526b60 a83dc6a2d56f
before: ! wall 0.013193 comb 0.020000 user 0.020000 sys 0.000000 (median of 224)
after: ! wall 0.013290 comb 0.010000 user 0.010000 sys 0.000000 (median of 222)
revision: small amount; added files: large amount; rename small amount; 4aa4e1f8e19a 169138063d63
before: ! wall 0.001673 comb 0.000000 user 0.000000 sys 0.000000 (median of 1000)
after: ! wall 0.001677 comb 0.000000 user 0.000000 sys 0.000000 (median of 1000)
revision: small amount; added files: small amount; rename small amount; 4bc173b045a6 964879152e2e
before: ! wall 0.000119 comb 0.000000 user 0.000000 sys 0.000000 (median of 8023)
after: ! wall 0.000119 comb 0.000000 user 0.000000 sys 0.000000 (median of 7997)
revision: medium amount; added files: large amount; rename medium amount; c95f1ced15f2 2c68e87c3efe
before: ! wall 0.201898 comb 0.210000 user 0.200000 sys 0.010000 (median of 48)
after: ! wall 0.167415 comb 0.170000 user 0.160000 sys 0.010000 (median of 58)
revision: medium amount; added files: medium amount; rename small amount; d343da0c55a8 d7746d32bf9d
before: ! wall 0.036820 comb 0.040000 user 0.040000 sys 0.000000 (median of 100)
after: ! wall 0.035797 comb 0.040000 user 0.040000 sys 0.000000 (median of 100)
The extra cost in the linear case can be reclaimed later with some extra logic.
Differential Revision: https://phab.mercurial-scm.org/D7124
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Tue, 15 Oct 2019 18:23:34 +0200 |
parents | c59eb1560c44 |
children |
line wrap: on
line source
# urllibcompat.py - adapters to ease using urllib2 on Py2 and urllib on Py3 # # Copyright 2017 Google, Inc. # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. from __future__ import absolute_import from .pycompat import getattr from . import pycompat _sysstr = pycompat.sysstr class _pycompatstub(object): def __init__(self): self._aliases = {} def _registeraliases(self, origin, items): """Add items that will be populated at the first access""" items = map(_sysstr, items) self._aliases.update( (item.replace(r'_', r'').lower(), (origin, item)) for item in items ) def _registeralias(self, origin, attr, name): """Alias ``origin``.``attr`` as ``name``""" self._aliases[_sysstr(name)] = (origin, _sysstr(attr)) def __getattr__(self, name): try: origin, item = self._aliases[name] except KeyError: raise AttributeError(name) self.__dict__[name] = obj = getattr(origin, item) return obj httpserver = _pycompatstub() urlreq = _pycompatstub() urlerr = _pycompatstub() if pycompat.ispy3: import urllib.parse urlreq._registeraliases( urllib.parse, ( b"splitattr", b"splitpasswd", b"splitport", b"splituser", b"urlparse", b"urlunparse", ), ) urlreq._registeralias(urllib.parse, b"parse_qs", b"parseqs") urlreq._registeralias(urllib.parse, b"parse_qsl", b"parseqsl") urlreq._registeralias(urllib.parse, b"unquote_to_bytes", b"unquote") import urllib.request urlreq._registeraliases( urllib.request, ( b"AbstractHTTPHandler", b"BaseHandler", b"build_opener", b"FileHandler", b"FTPHandler", b"ftpwrapper", b"HTTPHandler", b"HTTPSHandler", b"install_opener", b"pathname2url", b"HTTPBasicAuthHandler", b"HTTPDigestAuthHandler", b"HTTPPasswordMgrWithDefaultRealm", b"ProxyHandler", b"Request", b"url2pathname", b"urlopen", ), ) import urllib.response urlreq._registeraliases(urllib.response, (b"addclosehook", b"addinfourl",)) import urllib.error urlerr._registeraliases(urllib.error, (b"HTTPError", b"URLError",)) import http.server httpserver._registeraliases( http.server, ( b"HTTPServer", b"BaseHTTPRequestHandler", b"SimpleHTTPRequestHandler", b"CGIHTTPRequestHandler", ), ) # urllib.parse.quote() accepts both str and bytes, decodes bytes # (if necessary), and returns str. This is wonky. We provide a custom # implementation that only accepts bytes and emits bytes. def quote(s, safe=r'/'): # bytestr has an __iter__ that emits characters. quote_from_bytes() # does an iteration and expects ints. We coerce to bytes to appease it. if isinstance(s, pycompat.bytestr): s = bytes(s) s = urllib.parse.quote_from_bytes(s, safe=safe) return s.encode('ascii', 'strict') # urllib.parse.urlencode() returns str. We use this function to make # sure we return bytes. def urlencode(query, doseq=False): s = urllib.parse.urlencode(query, doseq=doseq) return s.encode('ascii') urlreq.quote = quote urlreq.urlencode = urlencode def getfullurl(req): return req.full_url def gethost(req): return req.host def getselector(req): return req.selector def getdata(req): return req.data def hasdata(req): return req.data is not None else: import BaseHTTPServer import CGIHTTPServer import SimpleHTTPServer import urllib2 import urllib import urlparse urlreq._registeraliases( urllib, ( b"addclosehook", b"addinfourl", b"ftpwrapper", b"pathname2url", b"quote", b"splitattr", b"splitpasswd", b"splitport", b"splituser", b"unquote", b"url2pathname", b"urlencode", ), ) urlreq._registeraliases( urllib2, ( b"AbstractHTTPHandler", b"BaseHandler", b"build_opener", b"FileHandler", b"FTPHandler", b"HTTPBasicAuthHandler", b"HTTPDigestAuthHandler", b"HTTPHandler", b"HTTPPasswordMgrWithDefaultRealm", b"HTTPSHandler", b"install_opener", b"ProxyHandler", b"Request", b"urlopen", ), ) urlreq._registeraliases(urlparse, (b"urlparse", b"urlunparse",)) urlreq._registeralias(urlparse, b"parse_qs", b"parseqs") urlreq._registeralias(urlparse, b"parse_qsl", b"parseqsl") urlerr._registeraliases(urllib2, (b"HTTPError", b"URLError",)) httpserver._registeraliases( BaseHTTPServer, (b"HTTPServer", b"BaseHTTPRequestHandler",) ) httpserver._registeraliases( SimpleHTTPServer, (b"SimpleHTTPRequestHandler",) ) httpserver._registeraliases(CGIHTTPServer, (b"CGIHTTPRequestHandler",)) def gethost(req): return req.get_host() def getselector(req): return req.get_selector() def getfullurl(req): return req.get_full_url() def getdata(req): return req.get_data() def hasdata(req): return req.has_data()