Mercurial > hg > mercurial-source
comparison mercurial/url.py @ 14076:924c82157d46
url: move URL parsing functions into util to improve startup time
The introduction of the new URL parsing code has created a startup
time regression. This is mainly due to the use of url.hasscheme() in
the ui class. It ends up importing many libraries that the url module
requires.
This fix helps marginally, but if we can get rid of the urllib import
in the URL parser all together, startup time will go back to normal.
perfstartup time before the URL refactoring (8796fb6af67e):
! wall 0.050692 comb 0.000000 user 0.000000 sys 0.000000 (best of 100)
current startup time (139fb11210bb):
! wall 0.070685 comb 0.000000 user 0.000000 sys 0.000000 (best of 100)
after this change:
! wall 0.064667 comb 0.000000 user 0.000000 sys 0.000000 (best of 100)
author | Brodie Rao <brodie@bitheap.org> |
---|---|
date | Sat, 30 Apr 2011 09:43:20 -0700 |
parents | b23a8dd36a21 |
children | 5fa21960b2f4 |
comparison
equal
deleted
inserted
replaced
14075:bc101902a68d | 14076:924c82157d46 |
---|---|
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com> | 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com> |
6 # | 6 # |
7 # This software may be used and distributed according to the terms of the | 7 # This software may be used and distributed according to the terms of the |
8 # GNU General Public License version 2 or any later version. | 8 # GNU General Public License version 2 or any later version. |
9 | 9 |
10 import urllib, urllib2, httplib, os, socket, cStringIO, re | 10 import urllib, urllib2, httplib, os, socket, cStringIO |
11 import __builtin__ | 11 import __builtin__ |
12 from i18n import _ | 12 from i18n import _ |
13 import keepalive, util | 13 import keepalive, util |
14 | |
15 class url(object): | |
16 """Reliable URL parser. | |
17 | |
18 This parses URLs and provides attributes for the following | |
19 components: | |
20 | |
21 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment> | |
22 | |
23 Missing components are set to None. The only exception is | |
24 fragment, which is set to '' if present but empty. | |
25 | |
26 If parsefragment is False, fragment is included in query. If | |
27 parsequery is False, query is included in path. If both are | |
28 False, both fragment and query are included in path. | |
29 | |
30 See http://www.ietf.org/rfc/rfc2396.txt for more information. | |
31 | |
32 Note that for backward compatibility reasons, bundle URLs do not | |
33 take host names. That means 'bundle://../' has a path of '../'. | |
34 | |
35 Examples: | |
36 | |
37 >>> url('http://www.ietf.org/rfc/rfc2396.txt') | |
38 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'> | |
39 >>> url('ssh://[::1]:2200//home/joe/repo') | |
40 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'> | |
41 >>> url('file:///home/joe/repo') | |
42 <url scheme: 'file', path: '/home/joe/repo'> | |
43 >>> url('bundle:foo') | |
44 <url scheme: 'bundle', path: 'foo'> | |
45 >>> url('bundle://../foo') | |
46 <url scheme: 'bundle', path: '../foo'> | |
47 >>> url('c:\\\\foo\\\\bar') | |
48 <url path: 'c:\\\\foo\\\\bar'> | |
49 | |
50 Authentication credentials: | |
51 | |
52 >>> url('ssh://joe:xyz@x/repo') | |
53 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'> | |
54 >>> url('ssh://joe@x/repo') | |
55 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'> | |
56 | |
57 Query strings and fragments: | |
58 | |
59 >>> url('http://host/a?b#c') | |
60 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'> | |
61 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False) | |
62 <url scheme: 'http', host: 'host', path: 'a?b#c'> | |
63 """ | |
64 | |
65 _safechars = "!~*'()+" | |
66 _safepchars = "/!~*'()+" | |
67 _matchscheme = re.compile(r'^[a-zA-Z0-9+.\-]+:').match | |
68 | |
69 def __init__(self, path, parsequery=True, parsefragment=True): | |
70 # We slowly chomp away at path until we have only the path left | |
71 self.scheme = self.user = self.passwd = self.host = None | |
72 self.port = self.path = self.query = self.fragment = None | |
73 self._localpath = True | |
74 self._hostport = '' | |
75 self._origpath = path | |
76 | |
77 # special case for Windows drive letters | |
78 if hasdriveletter(path): | |
79 self.path = path | |
80 return | |
81 | |
82 # For compatibility reasons, we can't handle bundle paths as | |
83 # normal URLS | |
84 if path.startswith('bundle:'): | |
85 self.scheme = 'bundle' | |
86 path = path[7:] | |
87 if path.startswith('//'): | |
88 path = path[2:] | |
89 self.path = path | |
90 return | |
91 | |
92 if self._matchscheme(path): | |
93 parts = path.split(':', 1) | |
94 if parts[0]: | |
95 self.scheme, path = parts | |
96 self._localpath = False | |
97 | |
98 if not path: | |
99 path = None | |
100 if self._localpath: | |
101 self.path = '' | |
102 return | |
103 else: | |
104 if parsefragment and '#' in path: | |
105 path, self.fragment = path.split('#', 1) | |
106 if not path: | |
107 path = None | |
108 if self._localpath: | |
109 self.path = path | |
110 return | |
111 | |
112 if parsequery and '?' in path: | |
113 path, self.query = path.split('?', 1) | |
114 if not path: | |
115 path = None | |
116 if not self.query: | |
117 self.query = None | |
118 | |
119 # // is required to specify a host/authority | |
120 if path and path.startswith('//'): | |
121 parts = path[2:].split('/', 1) | |
122 if len(parts) > 1: | |
123 self.host, path = parts | |
124 path = path | |
125 else: | |
126 self.host = parts[0] | |
127 path = None | |
128 if not self.host: | |
129 self.host = None | |
130 if path: | |
131 path = '/' + path | |
132 | |
133 if self.host and '@' in self.host: | |
134 self.user, self.host = self.host.rsplit('@', 1) | |
135 if ':' in self.user: | |
136 self.user, self.passwd = self.user.split(':', 1) | |
137 if not self.host: | |
138 self.host = None | |
139 | |
140 # Don't split on colons in IPv6 addresses without ports | |
141 if (self.host and ':' in self.host and | |
142 not (self.host.startswith('[') and self.host.endswith(']'))): | |
143 self._hostport = self.host | |
144 self.host, self.port = self.host.rsplit(':', 1) | |
145 if not self.host: | |
146 self.host = None | |
147 | |
148 if (self.host and self.scheme == 'file' and | |
149 self.host not in ('localhost', '127.0.0.1', '[::1]')): | |
150 raise util.Abort(_('file:// URLs can only refer to localhost')) | |
151 | |
152 self.path = path | |
153 | |
154 for a in ('user', 'passwd', 'host', 'port', | |
155 'path', 'query', 'fragment'): | |
156 v = getattr(self, a) | |
157 if v is not None: | |
158 setattr(self, a, urllib.unquote(v)) | |
159 | |
160 def __repr__(self): | |
161 attrs = [] | |
162 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path', | |
163 'query', 'fragment'): | |
164 v = getattr(self, a) | |
165 if v is not None: | |
166 attrs.append('%s: %r' % (a, v)) | |
167 return '<url %s>' % ', '.join(attrs) | |
168 | |
169 def __str__(self): | |
170 """Join the URL's components back into a URL string. | |
171 | |
172 Examples: | |
173 | |
174 >>> str(url('http://user:pw@host:80/?foo#bar')) | |
175 'http://user:pw@host:80/?foo#bar' | |
176 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#')) | |
177 'ssh://user:pw@[::1]:2200//home/joe#' | |
178 >>> str(url('http://localhost:80//')) | |
179 'http://localhost:80//' | |
180 >>> str(url('http://localhost:80/')) | |
181 'http://localhost:80/' | |
182 >>> str(url('http://localhost:80')) | |
183 'http://localhost:80/' | |
184 >>> str(url('bundle:foo')) | |
185 'bundle:foo' | |
186 >>> str(url('bundle://../foo')) | |
187 'bundle:../foo' | |
188 >>> str(url('path')) | |
189 'path' | |
190 """ | |
191 if self._localpath: | |
192 s = self.path | |
193 if self.scheme == 'bundle': | |
194 s = 'bundle:' + s | |
195 if self.fragment: | |
196 s += '#' + self.fragment | |
197 return s | |
198 | |
199 s = self.scheme + ':' | |
200 if (self.user or self.passwd or self.host or | |
201 self.scheme and not self.path): | |
202 s += '//' | |
203 if self.user: | |
204 s += urllib.quote(self.user, safe=self._safechars) | |
205 if self.passwd: | |
206 s += ':' + urllib.quote(self.passwd, safe=self._safechars) | |
207 if self.user or self.passwd: | |
208 s += '@' | |
209 if self.host: | |
210 if not (self.host.startswith('[') and self.host.endswith(']')): | |
211 s += urllib.quote(self.host) | |
212 else: | |
213 s += self.host | |
214 if self.port: | |
215 s += ':' + urllib.quote(self.port) | |
216 if self.host: | |
217 s += '/' | |
218 if self.path: | |
219 s += urllib.quote(self.path, safe=self._safepchars) | |
220 if self.query: | |
221 s += '?' + urllib.quote(self.query, safe=self._safepchars) | |
222 if self.fragment is not None: | |
223 s += '#' + urllib.quote(self.fragment, safe=self._safepchars) | |
224 return s | |
225 | |
226 def authinfo(self): | |
227 user, passwd = self.user, self.passwd | |
228 try: | |
229 self.user, self.passwd = None, None | |
230 s = str(self) | |
231 finally: | |
232 self.user, self.passwd = user, passwd | |
233 if not self.user: | |
234 return (s, None) | |
235 return (s, (None, (str(self), self.host), | |
236 self.user, self.passwd or '')) | |
237 | |
238 def localpath(self): | |
239 if self.scheme == 'file' or self.scheme == 'bundle': | |
240 path = self.path or '/' | |
241 # For Windows, we need to promote hosts containing drive | |
242 # letters to paths with drive letters. | |
243 if hasdriveletter(self._hostport): | |
244 path = self._hostport + '/' + self.path | |
245 elif self.host is not None and self.path: | |
246 path = '/' + path | |
247 # We also need to handle the case of file:///C:/, which | |
248 # should return C:/, not /C:/. | |
249 elif hasdriveletter(path): | |
250 # Strip leading slash from paths with drive names | |
251 return path[1:] | |
252 return path | |
253 return self._origpath | |
254 | |
255 def hasscheme(path): | |
256 return bool(url(path).scheme) | |
257 | |
258 def hasdriveletter(path): | |
259 return path[1:2] == ':' and path[0:1].isalpha() | |
260 | |
261 def localpath(path): | |
262 return url(path, parsequery=False, parsefragment=False).localpath() | |
263 | |
264 def hidepassword(u): | |
265 '''hide user credential in a url string''' | |
266 u = url(u) | |
267 if u.passwd: | |
268 u.passwd = '***' | |
269 return str(u) | |
270 | |
271 def removeauth(u): | |
272 '''remove all authentication information from a url string''' | |
273 u = url(u) | |
274 u.user = u.passwd = None | |
275 return str(u) | |
276 | 14 |
277 def readauthforuri(ui, uri): | 15 def readauthforuri(ui, uri): |
278 # Read configuration | 16 # Read configuration |
279 config = dict() | 17 config = dict() |
280 for key, val in ui.configitems('auth'): | 18 for key, val in ui.configitems('auth'): |
355 if proxyurl: | 93 if proxyurl: |
356 # proxy can be proper url or host[:port] | 94 # proxy can be proper url or host[:port] |
357 if not (proxyurl.startswith('http:') or | 95 if not (proxyurl.startswith('http:') or |
358 proxyurl.startswith('https:')): | 96 proxyurl.startswith('https:')): |
359 proxyurl = 'http://' + proxyurl + '/' | 97 proxyurl = 'http://' + proxyurl + '/' |
360 proxy = url(proxyurl) | 98 proxy = util.url(proxyurl) |
361 if not proxy.user: | 99 if not proxy.user: |
362 proxy.user = ui.config("http_proxy", "user") | 100 proxy.user = ui.config("http_proxy", "user") |
363 proxy.passwd = ui.config("http_proxy", "passwd") | 101 proxy.passwd = ui.config("http_proxy", "passwd") |
364 | 102 |
365 # see if we should use a proxy for this url | 103 # see if we should use a proxy for this url |
543 else: | 281 else: |
544 tunnel_host = req.get_selector() | 282 tunnel_host = req.get_selector() |
545 new_tunnel = False | 283 new_tunnel = False |
546 | 284 |
547 if new_tunnel or tunnel_host == req.get_full_url(): # has proxy | 285 if new_tunnel or tunnel_host == req.get_full_url(): # has proxy |
548 u = url(tunnel_host) | 286 u = util.url(tunnel_host) |
549 if new_tunnel or u.scheme == 'https': # only use CONNECT for HTTPS | 287 if new_tunnel or u.scheme == 'https': # only use CONNECT for HTTPS |
550 h.realhostport = ':'.join([u.host, (u.port or '443')]) | 288 h.realhostport = ':'.join([u.host, (u.port or '443')]) |
551 h.headers = req.headers.copy() | 289 h.headers = req.headers.copy() |
552 h.headers.update(handler.parent.addheaders) | 290 h.headers.update(handler.parent.addheaders) |
553 return | 291 return |
874 opener.addheaders = [('User-agent', 'mercurial/proto-1.0')] | 612 opener.addheaders = [('User-agent', 'mercurial/proto-1.0')] |
875 opener.addheaders.append(('Accept', 'application/mercurial-0.1')) | 613 opener.addheaders.append(('Accept', 'application/mercurial-0.1')) |
876 return opener | 614 return opener |
877 | 615 |
878 def open(ui, url_, data=None): | 616 def open(ui, url_, data=None): |
879 u = url(url_) | 617 u = util.url(url_) |
880 if u.scheme: | 618 if u.scheme: |
881 u.scheme = u.scheme.lower() | 619 u.scheme = u.scheme.lower() |
882 url_, authinfo = u.authinfo() | 620 url_, authinfo = u.authinfo() |
883 else: | 621 else: |
884 path = util.normpath(os.path.abspath(url_)) | 622 path = util.normpath(os.path.abspath(url_)) |