Mercurial > hg > mercurial-source
comparison mercurial/parsers.c @ 7108:1ca878d7b849
C implementation of revlog index parsing
author | Bernhard Leiner <bleiner@gmail.com> |
---|---|
date | Fri, 17 Oct 2008 01:03:38 +0200 |
parents | 16bafcebd3d1 |
children | 3cf699e89e48 42db22108d85 |
comparison
equal
deleted
inserted
replaced
7107:125c8fedcbe0 | 7108:1ca878d7b849 |
---|---|
232 Py_XDECREF(entry); | 232 Py_XDECREF(entry); |
233 Py_XDECREF(parents); | 233 Py_XDECREF(parents); |
234 return ret; | 234 return ret; |
235 } | 235 } |
236 | 236 |
237 | |
238 static inline uint64_t ntohll(uint64_t x) | |
239 { | |
240 return (((uint64_t)ntohl((uint32_t)x)) << 32) | | |
241 (uint64_t)ntohl((uint32_t)(x >> 32)); | |
242 } | |
243 | |
244 const char nullid[20]; | |
245 const int nullrev = -1; | |
246 | |
247 /* RevlogNG format (all in big endian, data may be inlined): | |
248 * 6 bytes: offset | |
249 * 2 bytes: flags | |
250 * 4 bytes: compressed length | |
251 * 4 bytes: uncompressed length | |
252 * 4 bytes: base revision | |
253 * 4 bytes: link revision | |
254 * 4 bytes: parent 1 revision | |
255 * 4 bytes: parent 2 revision | |
256 * 32 bytes: nodeid (only 20 bytes used) | |
257 */ | |
258 static int _parse_index_ng (const char *data, int size, int inlined, | |
259 PyObject *index, PyObject *nodemap) | |
260 { | |
261 PyObject *entry = NULL, *node_id = NULL, *n_obj = NULL; | |
262 PyObject *nullrev_obj = NULL, *nullid_obj = NULL; | |
263 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2; | |
264 uint64_t offset_flags; | |
265 int n = 0; | |
266 const char *end = data + size; | |
267 | |
268 while (data < end) { | |
269 offset_flags = ntohll(*((uint64_t *) data)); | |
270 if (n == 0) /* mask out version number for the first entry */ | |
271 offset_flags &= 0xFFFF; | |
272 | |
273 comp_len = ntohl(*((uint32_t *) (data + 8))); | |
274 uncomp_len = ntohl(*((uint32_t *) (data + 12))); | |
275 base_rev = ntohl(*((uint32_t *) (data + 16))); | |
276 link_rev = ntohl(*((uint32_t *) (data + 20))); | |
277 parent_1 = ntohl(*((uint32_t *) (data + 24))); | |
278 parent_2 = ntohl(*((uint32_t *) (data + 28))); | |
279 node_id = PyString_FromStringAndSize(data + 32, 20); | |
280 n_obj = PyInt_FromLong(n); | |
281 if (!node_id || !n_obj || | |
282 PyDict_SetItem(nodemap, node_id, n_obj) != 0) | |
283 goto quit; | |
284 Py_DECREF(n_obj); | |
285 | |
286 entry = Py_BuildValue("LiiiiiiN", offset_flags, comp_len, | |
287 uncomp_len, base_rev, link_rev, | |
288 parent_1, parent_2, node_id); | |
289 PyObject_GC_UnTrack(entry); /* don't waste time with this */ | |
290 if (!entry) | |
291 goto quit; | |
292 | |
293 /* append to or set value in the index list */ | |
294 if (inlined) { | |
295 if (PyList_Append(index, entry) != 0) | |
296 goto quit; | |
297 Py_DECREF(entry); | |
298 } else { | |
299 PyList_SET_ITEM(index, n, entry); /* steals reference */ | |
300 } | |
301 | |
302 data += 64 + (inlined ? comp_len : 0); | |
303 n++; | |
304 } | |
305 if (data > end) { | |
306 if (!PyErr_Occurred()) | |
307 PyErr_SetString(PyExc_ValueError, "corrupt index file"); | |
308 goto quit; | |
309 } | |
310 | |
311 /* create the nullid/nullrev entry in the nodemap and the | |
312 * magic nullid entry in the index at [-1] */ | |
313 nullid_obj = PyString_FromStringAndSize(nullid, 20); | |
314 nullrev_obj = PyInt_FromLong(nullrev); | |
315 if (!nodemap || !nullid_obj || !nullrev_obj || | |
316 PyDict_SetItem(nodemap, nullid_obj, nullrev_obj) != 0) | |
317 goto quit; | |
318 Py_DECREF(nullrev_obj); | |
319 | |
320 entry = Py_BuildValue("iiiiiiiN", 0, 0, 0, -1, -1, -1, -1, nullid_obj); | |
321 PyObject_GC_UnTrack(entry); /* don't waste time with this */ | |
322 if (!entry) | |
323 goto quit; | |
324 if (inlined) { | |
325 if (PyList_Append(index, entry) != 0) | |
326 goto quit; | |
327 Py_DECREF(entry); | |
328 } else { | |
329 PyList_SET_ITEM(index, n, entry); /* steals reference */ | |
330 } | |
331 | |
332 return 1; | |
333 | |
334 quit: | |
335 Py_XDECREF(n_obj); | |
336 Py_XDECREF(node_id); | |
337 Py_XDECREF(entry); | |
338 Py_XDECREF(nullrev_obj); | |
339 Py_XDECREF(nullid_obj); | |
340 return 0; | |
341 } | |
342 | |
343 | |
344 | |
345 /* This function parses a index file and returns a Python tuple of the | |
346 * following format: (index, nodemap, cache) | |
347 * | |
348 * index: a list of tuples containing the RevlogNG records | |
349 * nodemap: a dict mapping node ids to indices in the index list | |
350 * cache: if data is inlined, a tuple (index_file_content, 0) else None | |
351 */ | |
352 static PyObject *parse_index(PyObject *self, PyObject *args) | |
353 { | |
354 const char *data; | |
355 int size, inlined; | |
356 PyObject *rval = NULL, *index = NULL, *nodemap = NULL, *cache = NULL; | |
357 PyObject *data_obj = NULL, *inlined_obj; | |
358 | |
359 if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj)) | |
360 return NULL; | |
361 inlined = inlined_obj && PyObject_IsTrue(inlined_obj); | |
362 | |
363 /* If no data is inlined, we know the size of the index list in | |
364 * advance: size divided by size of one one revlog record (64 bytes) | |
365 * plus one for the nullid */ | |
366 index = inlined ? PyList_New(0) : PyList_New(size / 64 + 1); | |
367 if (!index) | |
368 goto quit; | |
369 | |
370 nodemap = PyDict_New(); | |
371 | |
372 /* set up the cache return value */ | |
373 if (inlined) { | |
374 /* Note that the reference to data_obj is only borrowed */ | |
375 data_obj = PyTuple_GET_ITEM(args, 0); | |
376 cache = Py_BuildValue("iO", 0, data_obj); | |
377 if (!cache) | |
378 goto quit; | |
379 } else { | |
380 cache = Py_None; | |
381 Py_INCREF(Py_None); | |
382 } | |
383 | |
384 /* actually populate the index and the nodemap with data */ | |
385 if (!_parse_index_ng (data, size, inlined, index, nodemap)) | |
386 goto quit; | |
387 | |
388 rval = Py_BuildValue("NNN", index, nodemap, cache); | |
389 if (!rval) | |
390 goto quit; | |
391 return rval; | |
392 | |
393 quit: | |
394 Py_XDECREF(index); | |
395 Py_XDECREF(nodemap); | |
396 Py_XDECREF(cache); | |
397 Py_XDECREF(rval); | |
398 Py_XDECREF(data_obj); | |
399 return NULL; | |
400 } | |
401 | |
402 | |
237 static char parsers_doc[] = "Efficient content parsing."; | 403 static char parsers_doc[] = "Efficient content parsing."; |
238 | 404 |
239 static PyMethodDef methods[] = { | 405 static PyMethodDef methods[] = { |
240 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"}, | 406 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"}, |
241 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"}, | 407 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"}, |
408 {"parse_index", parse_index, METH_VARARGS, "parse a revlog index\n"}, | |
242 {NULL, NULL} | 409 {NULL, NULL} |
243 }; | 410 }; |
244 | 411 |
245 PyMODINIT_FUNC initparsers(void) | 412 PyMODINIT_FUNC initparsers(void) |
246 { | 413 { |