72
|
1 /* |
|
2 mpatch.c - efficient binary patching for Mercurial |
|
3 |
|
4 This implements a patch algorithm that's O(m + nlog n) where m is the |
|
5 size of the output and n is the number of patches. |
|
6 |
|
7 Given a list of binary patches, it unpacks each into a hunk list, |
|
8 then combines the hunk lists with a treewise recursion to form a |
|
9 single hunk list. This hunk list is then applied to the original |
|
10 text. |
|
11 |
|
12 The text (or binary) fragments are copied directly from their source |
|
13 Python objects into a preallocated output string to avoid the |
|
14 allocation of intermediate Python objects. Working memory is about 2x |
|
15 the total number of hunks. |
|
16 |
|
17 Copyright 2005 Matt Mackall <mpm@selenic.com> |
|
18 |
|
19 This software may be used and distributed according to the terms |
|
20 of the GNU General Public License, incorporated herein by reference. |
|
21 */ |
|
22 |
|
23 #include <Python.h> |
|
24 #include <stdlib.h> |
|
25 #include <string.h> |
|
26 #include <netinet/in.h> |
|
27 #include <sys/types.h> |
|
28 |
|
29 static char mpatch_doc[] = "Efficient binary patching."; |
|
30 |
|
31 struct frag { |
|
32 int start, end, len; |
|
33 char *data; |
|
34 }; |
|
35 |
|
36 struct flist { |
|
37 struct frag *base, *head, *tail; |
|
38 }; |
|
39 |
|
40 static struct flist *lalloc(int size) |
|
41 { |
128
|
42 struct flist *a = NULL; |
72
|
43 |
|
44 a = malloc(sizeof(struct flist)); |
128
|
45 if (a) { |
|
46 a->base = malloc(sizeof(struct frag) * size); |
|
47 if (!a->base) |
|
48 free(a); |
|
49 else |
|
50 a->head = a->tail = a->base; |
|
51 } |
72
|
52 return a; |
|
53 } |
|
54 |
|
55 static void lfree(struct flist *a) |
|
56 { |
128
|
57 if (a) { |
|
58 free(a->base); |
|
59 free(a); |
|
60 } |
72
|
61 } |
|
62 |
|
63 static int lsize(struct flist *a) |
|
64 { |
|
65 return a->tail - a->head; |
|
66 } |
|
67 |
|
68 /* move hunks in source that are less cut to dest, compensating |
|
69 for changes in offset. the last hunk may be split if necessary. |
|
70 */ |
|
71 static int gather(struct flist *dest, struct flist *src, int cut, int offset) |
|
72 { |
|
73 struct frag *d = dest->tail, *s = src->head; |
|
74 int postend, c, l; |
|
75 |
|
76 while (s != src->tail) { |
|
77 if (s->start + offset >= cut) |
82
|
78 break; /* we've gone far enough */ |
72
|
79 |
|
80 postend = offset + s->start + s->len; |
|
81 if (postend <= cut) { |
|
82 /* save this hunk */ |
|
83 offset += s->start + s->len - s->end; |
|
84 *d++ = *s++; |
|
85 } |
|
86 else { |
|
87 /* break up this hunk */ |
|
88 c = cut - offset; |
|
89 if (s->end < c) |
|
90 c = s->end; |
|
91 l = cut - offset - s->start; |
|
92 if (s->len < l) |
|
93 l = s->len; |
|
94 |
|
95 offset += s->start + l - c; |
|
96 |
|
97 d->start = s->start; |
|
98 d->end = c; |
|
99 d->len = l; |
|
100 d->data = s->data; |
|
101 d++; |
|
102 s->start = c; |
|
103 s->len = s->len - l; |
|
104 s->data = s->data + l; |
|
105 |
82
|
106 break; |
72
|
107 } |
|
108 } |
|
109 |
|
110 dest->tail = d; |
|
111 src->head = s; |
|
112 return offset; |
|
113 } |
|
114 |
|
115 /* like gather, but with no output list */ |
|
116 static int discard(struct flist *src, int cut, int offset) |
|
117 { |
|
118 struct frag *s = src->head; |
|
119 int postend, c, l; |
|
120 |
|
121 while (s != src->tail) { |
|
122 if (s->start + offset >= cut) |
82
|
123 break; |
72
|
124 |
|
125 postend = offset + s->start + s->len; |
|
126 if (postend <= cut) { |
|
127 offset += s->start + s->len - s->end; |
|
128 s++; |
|
129 } |
|
130 else { |
|
131 c = cut - offset; |
|
132 if (s->end < c) |
|
133 c = s->end; |
|
134 l = cut - offset - s->start; |
|
135 if (s->len < l) |
|
136 l = s->len; |
|
137 |
|
138 offset += s->start + l - c; |
|
139 s->start = c; |
|
140 s->len = s->len - l; |
|
141 s->data = s->data + l; |
|
142 |
82
|
143 break; |
72
|
144 } |
|
145 } |
|
146 |
|
147 src->head = s; |
|
148 return offset; |
|
149 } |
|
150 |
|
151 /* combine hunk lists a and b, while adjusting b for offset changes in a/ |
|
152 this deletes a and b and returns the resultant list. */ |
|
153 static struct flist *combine(struct flist *a, struct flist *b) |
|
154 { |
128
|
155 struct flist *c = NULL; |
|
156 struct frag *bh, *ct; |
72
|
157 int offset = 0, post; |
|
158 |
128
|
159 if (a && b) |
|
160 c = lalloc((lsize(a) + lsize(b)) * 2); |
|
161 |
|
162 if (c) { |
72
|
163 |
128
|
164 for (bh = b->head; bh != b->tail; bh++) { |
|
165 /* save old hunks */ |
|
166 offset = gather(c, a, bh->start, offset); |
72
|
167 |
128
|
168 /* discard replaced hunks */ |
|
169 post = discard(a, bh->end, offset); |
72
|
170 |
128
|
171 /* insert new hunk */ |
|
172 ct = c->tail; |
|
173 ct->start = bh->start - offset; |
|
174 ct->end = bh->end - post; |
|
175 ct->len = bh->len; |
|
176 ct->data = bh->data; |
|
177 c->tail++; |
|
178 offset = post; |
|
179 } |
|
180 |
|
181 /* hold on to tail from a */ |
|
182 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a)); |
|
183 c->tail += lsize(a); |
72
|
184 } |
|
185 |
|
186 lfree(a); |
|
187 lfree(b); |
|
188 return c; |
|
189 } |
|
190 |
|
191 /* decode a binary patch into a hunk list */ |
|
192 static struct flist *decode(char *bin, int len) |
|
193 { |
|
194 struct flist *l; |
|
195 struct frag *lt; |
|
196 char *end = bin + len; |
|
197 |
|
198 /* assume worst case size, we won't have many of these lists */ |
|
199 l = lalloc(len / 12); |
|
200 lt = l->tail; |
|
201 |
|
202 while (bin < end) { |
|
203 lt->start = ntohl(*(uint32_t *)bin); |
|
204 lt->end = ntohl(*(uint32_t *)(bin + 4)); |
|
205 lt->len = ntohl(*(uint32_t *)(bin + 8)); |
|
206 lt->data = bin + 12; |
|
207 bin += 12 + lt->len; |
|
208 lt++; |
|
209 } |
|
210 |
|
211 l->tail = lt; |
|
212 return l; |
|
213 } |
|
214 |
|
215 /* calculate the size of resultant text */ |
|
216 static int calcsize(int len, struct flist *l) |
|
217 { |
|
218 int outlen = 0, last = 0; |
|
219 struct frag *f = l->head; |
|
220 |
|
221 while (f != l->tail) { |
|
222 outlen += f->start - last; |
|
223 last = f->end; |
|
224 outlen += f->len; |
|
225 f++; |
|
226 } |
|
227 |
|
228 outlen += len - last; |
|
229 return outlen; |
|
230 } |
|
231 |
|
232 static void apply(char *buf, char *orig, int len, struct flist *l) |
|
233 { |
|
234 struct frag *f = l->head; |
|
235 int last = 0; |
|
236 char *p = buf; |
|
237 |
|
238 while (f != l->tail) { |
|
239 memcpy(p, orig + last, f->start - last); |
|
240 p += f->start - last; |
|
241 memcpy(p, f->data, f->len); |
|
242 last = f->end; |
|
243 p += f->len; |
|
244 f++; |
|
245 } |
|
246 memcpy(p, orig + last, len - last); |
|
247 } |
|
248 |
|
249 /* recursively generate a patch of all bins between start and end */ |
|
250 static struct flist *fold(PyObject *bins, int start, int end) |
|
251 { |
|
252 int len; |
|
253 |
|
254 if (start + 1 == end) { |
|
255 /* trivial case, output a decoded list */ |
|
256 PyObject *tmp = PyList_GetItem(bins, start); |
128
|
257 if (!tmp) |
|
258 return NULL; |
72
|
259 return decode(PyString_AsString(tmp), PyString_Size(tmp)); |
|
260 } |
|
261 |
|
262 /* divide and conquer, memory management is elsewhere */ |
|
263 len = (end - start) / 2; |
|
264 return combine(fold(bins, start, start + len), |
|
265 fold(bins, start + len, end)); |
|
266 } |
|
267 |
|
268 static PyObject * |
|
269 patches(PyObject *self, PyObject *args) |
|
270 { |
|
271 PyObject *text, *bins, *result; |
|
272 struct flist *patch; |
|
273 char *in, *out; |
|
274 int len, outlen; |
|
275 |
128
|
276 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins)) |
72
|
277 return NULL; |
|
278 |
|
279 len = PyList_Size(bins); |
|
280 if (!len) { |
|
281 /* nothing to do */ |
|
282 Py_INCREF(text); |
|
283 return text; |
|
284 } |
|
285 |
|
286 patch = fold(bins, 0, len); |
128
|
287 if (!patch) |
|
288 return PyErr_NoMemory(); |
|
289 |
72
|
290 outlen = calcsize(PyString_Size(text), patch); |
|
291 result = PyString_FromStringAndSize(NULL, outlen); |
128
|
292 if (result) { |
|
293 in = PyString_AsString(text); |
|
294 out = PyString_AsString(result); |
|
295 apply(out, in, PyString_Size(text), patch); |
|
296 } |
|
297 |
72
|
298 lfree(patch); |
|
299 return result; |
|
300 } |
|
301 |
|
302 static PyMethodDef methods[] = { |
|
303 {"patches", patches, METH_VARARGS, "apply a series of patches\n"}, |
|
304 {NULL, NULL} |
|
305 }; |
|
306 |
|
307 PyMODINIT_FUNC |
|
308 initmpatch(void) |
|
309 { |
|
310 Py_InitModule3("mpatch", methods, mpatch_doc); |
|
311 } |
|
312 |