comparison mercurial/url.py @ 14076:924c82157d46

url: move URL parsing functions into util to improve startup time The introduction of the new URL parsing code has created a startup time regression. This is mainly due to the use of url.hasscheme() in the ui class. It ends up importing many libraries that the url module requires. This fix helps marginally, but if we can get rid of the urllib import in the URL parser all together, startup time will go back to normal. perfstartup time before the URL refactoring (8796fb6af67e): ! wall 0.050692 comb 0.000000 user 0.000000 sys 0.000000 (best of 100) current startup time (139fb11210bb): ! wall 0.070685 comb 0.000000 user 0.000000 sys 0.000000 (best of 100) after this change: ! wall 0.064667 comb 0.000000 user 0.000000 sys 0.000000 (best of 100)
author Brodie Rao <brodie@bitheap.org>
date Sat, 30 Apr 2011 09:43:20 -0700
parents b23a8dd36a21
children 5fa21960b2f4
comparison
equal deleted inserted replaced
14075:bc101902a68d 14076:924c82157d46
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com> 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 # 6 #
7 # This software may be used and distributed according to the terms of the 7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version. 8 # GNU General Public License version 2 or any later version.
9 9
10 import urllib, urllib2, httplib, os, socket, cStringIO, re 10 import urllib, urllib2, httplib, os, socket, cStringIO
11 import __builtin__ 11 import __builtin__
12 from i18n import _ 12 from i18n import _
13 import keepalive, util 13 import keepalive, util
14
15 class url(object):
16 """Reliable URL parser.
17
18 This parses URLs and provides attributes for the following
19 components:
20
21 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
22
23 Missing components are set to None. The only exception is
24 fragment, which is set to '' if present but empty.
25
26 If parsefragment is False, fragment is included in query. If
27 parsequery is False, query is included in path. If both are
28 False, both fragment and query are included in path.
29
30 See http://www.ietf.org/rfc/rfc2396.txt for more information.
31
32 Note that for backward compatibility reasons, bundle URLs do not
33 take host names. That means 'bundle://../' has a path of '../'.
34
35 Examples:
36
37 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
38 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
39 >>> url('ssh://[::1]:2200//home/joe/repo')
40 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
41 >>> url('file:///home/joe/repo')
42 <url scheme: 'file', path: '/home/joe/repo'>
43 >>> url('bundle:foo')
44 <url scheme: 'bundle', path: 'foo'>
45 >>> url('bundle://../foo')
46 <url scheme: 'bundle', path: '../foo'>
47 >>> url('c:\\\\foo\\\\bar')
48 <url path: 'c:\\\\foo\\\\bar'>
49
50 Authentication credentials:
51
52 >>> url('ssh://joe:xyz@x/repo')
53 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
54 >>> url('ssh://joe@x/repo')
55 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
56
57 Query strings and fragments:
58
59 >>> url('http://host/a?b#c')
60 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
61 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
62 <url scheme: 'http', host: 'host', path: 'a?b#c'>
63 """
64
65 _safechars = "!~*'()+"
66 _safepchars = "/!~*'()+"
67 _matchscheme = re.compile(r'^[a-zA-Z0-9+.\-]+:').match
68
69 def __init__(self, path, parsequery=True, parsefragment=True):
70 # We slowly chomp away at path until we have only the path left
71 self.scheme = self.user = self.passwd = self.host = None
72 self.port = self.path = self.query = self.fragment = None
73 self._localpath = True
74 self._hostport = ''
75 self._origpath = path
76
77 # special case for Windows drive letters
78 if hasdriveletter(path):
79 self.path = path
80 return
81
82 # For compatibility reasons, we can't handle bundle paths as
83 # normal URLS
84 if path.startswith('bundle:'):
85 self.scheme = 'bundle'
86 path = path[7:]
87 if path.startswith('//'):
88 path = path[2:]
89 self.path = path
90 return
91
92 if self._matchscheme(path):
93 parts = path.split(':', 1)
94 if parts[0]:
95 self.scheme, path = parts
96 self._localpath = False
97
98 if not path:
99 path = None
100 if self._localpath:
101 self.path = ''
102 return
103 else:
104 if parsefragment and '#' in path:
105 path, self.fragment = path.split('#', 1)
106 if not path:
107 path = None
108 if self._localpath:
109 self.path = path
110 return
111
112 if parsequery and '?' in path:
113 path, self.query = path.split('?', 1)
114 if not path:
115 path = None
116 if not self.query:
117 self.query = None
118
119 # // is required to specify a host/authority
120 if path and path.startswith('//'):
121 parts = path[2:].split('/', 1)
122 if len(parts) > 1:
123 self.host, path = parts
124 path = path
125 else:
126 self.host = parts[0]
127 path = None
128 if not self.host:
129 self.host = None
130 if path:
131 path = '/' + path
132
133 if self.host and '@' in self.host:
134 self.user, self.host = self.host.rsplit('@', 1)
135 if ':' in self.user:
136 self.user, self.passwd = self.user.split(':', 1)
137 if not self.host:
138 self.host = None
139
140 # Don't split on colons in IPv6 addresses without ports
141 if (self.host and ':' in self.host and
142 not (self.host.startswith('[') and self.host.endswith(']'))):
143 self._hostport = self.host
144 self.host, self.port = self.host.rsplit(':', 1)
145 if not self.host:
146 self.host = None
147
148 if (self.host and self.scheme == 'file' and
149 self.host not in ('localhost', '127.0.0.1', '[::1]')):
150 raise util.Abort(_('file:// URLs can only refer to localhost'))
151
152 self.path = path
153
154 for a in ('user', 'passwd', 'host', 'port',
155 'path', 'query', 'fragment'):
156 v = getattr(self, a)
157 if v is not None:
158 setattr(self, a, urllib.unquote(v))
159
160 def __repr__(self):
161 attrs = []
162 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
163 'query', 'fragment'):
164 v = getattr(self, a)
165 if v is not None:
166 attrs.append('%s: %r' % (a, v))
167 return '<url %s>' % ', '.join(attrs)
168
169 def __str__(self):
170 """Join the URL's components back into a URL string.
171
172 Examples:
173
174 >>> str(url('http://user:pw@host:80/?foo#bar'))
175 'http://user:pw@host:80/?foo#bar'
176 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
177 'ssh://user:pw@[::1]:2200//home/joe#'
178 >>> str(url('http://localhost:80//'))
179 'http://localhost:80//'
180 >>> str(url('http://localhost:80/'))
181 'http://localhost:80/'
182 >>> str(url('http://localhost:80'))
183 'http://localhost:80/'
184 >>> str(url('bundle:foo'))
185 'bundle:foo'
186 >>> str(url('bundle://../foo'))
187 'bundle:../foo'
188 >>> str(url('path'))
189 'path'
190 """
191 if self._localpath:
192 s = self.path
193 if self.scheme == 'bundle':
194 s = 'bundle:' + s
195 if self.fragment:
196 s += '#' + self.fragment
197 return s
198
199 s = self.scheme + ':'
200 if (self.user or self.passwd or self.host or
201 self.scheme and not self.path):
202 s += '//'
203 if self.user:
204 s += urllib.quote(self.user, safe=self._safechars)
205 if self.passwd:
206 s += ':' + urllib.quote(self.passwd, safe=self._safechars)
207 if self.user or self.passwd:
208 s += '@'
209 if self.host:
210 if not (self.host.startswith('[') and self.host.endswith(']')):
211 s += urllib.quote(self.host)
212 else:
213 s += self.host
214 if self.port:
215 s += ':' + urllib.quote(self.port)
216 if self.host:
217 s += '/'
218 if self.path:
219 s += urllib.quote(self.path, safe=self._safepchars)
220 if self.query:
221 s += '?' + urllib.quote(self.query, safe=self._safepchars)
222 if self.fragment is not None:
223 s += '#' + urllib.quote(self.fragment, safe=self._safepchars)
224 return s
225
226 def authinfo(self):
227 user, passwd = self.user, self.passwd
228 try:
229 self.user, self.passwd = None, None
230 s = str(self)
231 finally:
232 self.user, self.passwd = user, passwd
233 if not self.user:
234 return (s, None)
235 return (s, (None, (str(self), self.host),
236 self.user, self.passwd or ''))
237
238 def localpath(self):
239 if self.scheme == 'file' or self.scheme == 'bundle':
240 path = self.path or '/'
241 # For Windows, we need to promote hosts containing drive
242 # letters to paths with drive letters.
243 if hasdriveletter(self._hostport):
244 path = self._hostport + '/' + self.path
245 elif self.host is not None and self.path:
246 path = '/' + path
247 # We also need to handle the case of file:///C:/, which
248 # should return C:/, not /C:/.
249 elif hasdriveletter(path):
250 # Strip leading slash from paths with drive names
251 return path[1:]
252 return path
253 return self._origpath
254
255 def hasscheme(path):
256 return bool(url(path).scheme)
257
258 def hasdriveletter(path):
259 return path[1:2] == ':' and path[0:1].isalpha()
260
261 def localpath(path):
262 return url(path, parsequery=False, parsefragment=False).localpath()
263
264 def hidepassword(u):
265 '''hide user credential in a url string'''
266 u = url(u)
267 if u.passwd:
268 u.passwd = '***'
269 return str(u)
270
271 def removeauth(u):
272 '''remove all authentication information from a url string'''
273 u = url(u)
274 u.user = u.passwd = None
275 return str(u)
276 14
277 def readauthforuri(ui, uri): 15 def readauthforuri(ui, uri):
278 # Read configuration 16 # Read configuration
279 config = dict() 17 config = dict()
280 for key, val in ui.configitems('auth'): 18 for key, val in ui.configitems('auth'):
355 if proxyurl: 93 if proxyurl:
356 # proxy can be proper url or host[:port] 94 # proxy can be proper url or host[:port]
357 if not (proxyurl.startswith('http:') or 95 if not (proxyurl.startswith('http:') or
358 proxyurl.startswith('https:')): 96 proxyurl.startswith('https:')):
359 proxyurl = 'http://' + proxyurl + '/' 97 proxyurl = 'http://' + proxyurl + '/'
360 proxy = url(proxyurl) 98 proxy = util.url(proxyurl)
361 if not proxy.user: 99 if not proxy.user:
362 proxy.user = ui.config("http_proxy", "user") 100 proxy.user = ui.config("http_proxy", "user")
363 proxy.passwd = ui.config("http_proxy", "passwd") 101 proxy.passwd = ui.config("http_proxy", "passwd")
364 102
365 # see if we should use a proxy for this url 103 # see if we should use a proxy for this url
543 else: 281 else:
544 tunnel_host = req.get_selector() 282 tunnel_host = req.get_selector()
545 new_tunnel = False 283 new_tunnel = False
546 284
547 if new_tunnel or tunnel_host == req.get_full_url(): # has proxy 285 if new_tunnel or tunnel_host == req.get_full_url(): # has proxy
548 u = url(tunnel_host) 286 u = util.url(tunnel_host)
549 if new_tunnel or u.scheme == 'https': # only use CONNECT for HTTPS 287 if new_tunnel or u.scheme == 'https': # only use CONNECT for HTTPS
550 h.realhostport = ':'.join([u.host, (u.port or '443')]) 288 h.realhostport = ':'.join([u.host, (u.port or '443')])
551 h.headers = req.headers.copy() 289 h.headers = req.headers.copy()
552 h.headers.update(handler.parent.addheaders) 290 h.headers.update(handler.parent.addheaders)
553 return 291 return
874 opener.addheaders = [('User-agent', 'mercurial/proto-1.0')] 612 opener.addheaders = [('User-agent', 'mercurial/proto-1.0')]
875 opener.addheaders.append(('Accept', 'application/mercurial-0.1')) 613 opener.addheaders.append(('Accept', 'application/mercurial-0.1'))
876 return opener 614 return opener
877 615
878 def open(ui, url_, data=None): 616 def open(ui, url_, data=None):
879 u = url(url_) 617 u = util.url(url_)
880 if u.scheme: 618 if u.scheme:
881 u.scheme = u.scheme.lower() 619 u.scheme = u.scheme.lower()
882 url_, authinfo = u.authinfo() 620 url_, authinfo = u.authinfo()
883 else: 621 else:
884 path = util.normpath(os.path.abspath(url_)) 622 path = util.normpath(os.path.abspath(url_))