1 | /* |
2 | * Copyright 2004-2014 H2 Group. Multiple-Licensed under the MPL 2.0, |
3 | * and the EPL 1.0 (http://h2database.com/html/license.html). |
4 | * Initial Developer: H2 Group |
5 | */ |
6 | package org.h2.index; |
7 | |
8 | import org.h2.api.DatabaseEventListener; |
9 | import org.h2.api.ErrorCode; |
10 | import org.h2.engine.Session; |
11 | import org.h2.engine.SysProperties; |
12 | import org.h2.message.DbException; |
13 | import org.h2.result.SearchRow; |
14 | import org.h2.store.Data; |
15 | import org.h2.store.Page; |
16 | import org.h2.store.PageStore; |
17 | import org.h2.util.Utils; |
18 | |
19 | /** |
20 | * A b-tree node page that contains index data. Format: |
21 | * <ul> |
22 | * <li>page type: byte</li> |
23 | * <li>checksum: short</li> |
24 | * <li>parent page id (0 for root): int</li> |
25 | * <li>index id: varInt</li> |
26 | * <li>count of all children (-1 if not known): int</li> |
27 | * <li>entry count: short</li> |
28 | * <li>rightmost child page id: int</li> |
29 | * <li>entries (child page id: int, offset: short)</li> |
30 | * </ul> |
31 | * The row contains the largest key of the respective child, |
32 | * meaning row[0] contains the largest key of child[0]. |
33 | */ |
34 | public class PageBtreeNode extends PageBtree { |
35 | |
36 | private static final int CHILD_OFFSET_PAIR_LENGTH = 6; |
37 | private static final int MAX_KEY_LENGTH = 10; |
38 | |
39 | private final boolean pageStoreInternalCount; |
40 | |
41 | /** |
42 | * The page ids of the children. |
43 | */ |
44 | private int[] childPageIds; |
45 | |
46 | private int rowCountStored = UNKNOWN_ROWCOUNT; |
47 | |
48 | private int rowCount = UNKNOWN_ROWCOUNT; |
49 | |
50 | private PageBtreeNode(PageBtreeIndex index, int pageId, Data data) { |
51 | super(index, pageId, data); |
52 | this.pageStoreInternalCount = index.getDatabase(). |
53 | getSettings().pageStoreInternalCount; |
54 | } |
55 | |
56 | /** |
57 | * Read a b-tree node page. |
58 | * |
59 | * @param index the index |
60 | * @param data the data |
61 | * @param pageId the page id |
62 | * @return the page |
63 | */ |
64 | public static Page read(PageBtreeIndex index, Data data, int pageId) { |
65 | PageBtreeNode p = new PageBtreeNode(index, pageId, data); |
66 | p.read(); |
67 | return p; |
68 | } |
69 | |
70 | /** |
71 | * Create a new b-tree node page. |
72 | * |
73 | * @param index the index |
74 | * @param pageId the page id |
75 | * @param parentPageId the parent page id |
76 | * @return the page |
77 | */ |
78 | static PageBtreeNode create(PageBtreeIndex index, int pageId, |
79 | int parentPageId) { |
80 | PageBtreeNode p = new PageBtreeNode(index, pageId, index.getPageStore() |
81 | .createData()); |
82 | index.getPageStore().logUndo(p, null); |
83 | p.parentPageId = parentPageId; |
84 | p.writeHead(); |
85 | // 4 bytes for the rightmost child page id |
86 | p.start = p.data.length() + 4; |
87 | p.rows = SearchRow.EMPTY_ARRAY; |
88 | if (p.pageStoreInternalCount) { |
89 | p.rowCount = 0; |
90 | } |
91 | return p; |
92 | } |
93 | |
94 | private void read() { |
95 | data.reset(); |
96 | int type = data.readByte(); |
97 | data.readShortInt(); |
98 | this.parentPageId = data.readInt(); |
99 | onlyPosition = (type & Page.FLAG_LAST) == 0; |
100 | int indexId = data.readVarInt(); |
101 | if (indexId != index.getId()) { |
102 | throw DbException.get(ErrorCode.FILE_CORRUPTED_1, |
103 | "page:" + getPos() + " expected index:" + index.getId() + |
104 | "got:" + indexId); |
105 | } |
106 | rowCount = rowCountStored = data.readInt(); |
107 | entryCount = data.readShortInt(); |
108 | childPageIds = new int[entryCount + 1]; |
109 | childPageIds[entryCount] = data.readInt(); |
110 | rows = entryCount == 0 ? SearchRow.EMPTY_ARRAY : new SearchRow[entryCount]; |
111 | offsets = Utils.newIntArray(entryCount); |
112 | for (int i = 0; i < entryCount; i++) { |
113 | childPageIds[i] = data.readInt(); |
114 | offsets[i] = data.readShortInt(); |
115 | } |
116 | check(); |
117 | start = data.length(); |
118 | written = true; |
119 | } |
120 | |
121 | /** |
122 | * Add a row. If it is possible this method returns -1, otherwise |
123 | * the split point. It is always possible to add two rows. |
124 | * |
125 | * @param row the now to add |
126 | * @return the split point of this page, or -1 if no split is required |
127 | */ |
128 | private int addChildTry(SearchRow row) { |
129 | if (entryCount < 4) { |
130 | return -1; |
131 | } |
132 | int startData; |
133 | if (onlyPosition) { |
134 | // if we only store the position, we may at most store as many |
135 | // entries as there is space for keys, because the current data area |
136 | // might get larger when _removing_ a child (if the new key needs |
137 | // more space) - and removing a child can't split this page |
138 | startData = entryCount + 1 * MAX_KEY_LENGTH; |
139 | } else { |
140 | int rowLength = index.getRowSize(data, row, onlyPosition); |
141 | int pageSize = index.getPageStore().getPageSize(); |
142 | int last = entryCount == 0 ? pageSize : offsets[entryCount - 1]; |
143 | startData = last - rowLength; |
144 | } |
145 | if (startData < start + CHILD_OFFSET_PAIR_LENGTH) { |
146 | return entryCount / 2; |
147 | } |
148 | return -1; |
149 | } |
150 | |
151 | /** |
152 | * Add a child at the given position. |
153 | * |
154 | * @param x the position |
155 | * @param childPageId the child |
156 | * @param row the row smaller than the first row of the child and its |
157 | * children |
158 | */ |
159 | private void addChild(int x, int childPageId, SearchRow row) { |
160 | int rowLength = index.getRowSize(data, row, onlyPosition); |
161 | int pageSize = index.getPageStore().getPageSize(); |
162 | int last = entryCount == 0 ? pageSize : offsets[entryCount - 1]; |
163 | if (last - rowLength < start + CHILD_OFFSET_PAIR_LENGTH) { |
164 | readAllRows(); |
165 | onlyPosition = true; |
166 | // change the offsets (now storing only positions) |
167 | int o = pageSize; |
168 | for (int i = 0; i < entryCount; i++) { |
169 | o -= index.getRowSize(data, getRow(i), true); |
170 | offsets[i] = o; |
171 | } |
172 | last = entryCount == 0 ? pageSize : offsets[entryCount - 1]; |
173 | rowLength = index.getRowSize(data, row, true); |
174 | if (SysProperties.CHECK && last - rowLength < |
175 | start + CHILD_OFFSET_PAIR_LENGTH) { |
176 | throw DbException.throwInternalError(); |
177 | } |
178 | } |
179 | int offset = last - rowLength; |
180 | if (entryCount > 0) { |
181 | if (x < entryCount) { |
182 | offset = (x == 0 ? pageSize : offsets[x - 1]) - rowLength; |
183 | } |
184 | } |
185 | rows = insert(rows, entryCount, x, row); |
186 | offsets = insert(offsets, entryCount, x, offset); |
187 | add(offsets, x + 1, entryCount + 1, -rowLength); |
188 | childPageIds = insert(childPageIds, entryCount + 1, x + 1, childPageId); |
189 | start += CHILD_OFFSET_PAIR_LENGTH; |
190 | if (pageStoreInternalCount) { |
191 | if (rowCount != UNKNOWN_ROWCOUNT) { |
192 | rowCount += offset; |
193 | } |
194 | } |
195 | entryCount++; |
196 | written = false; |
197 | changeCount = index.getPageStore().getChangeCount(); |
198 | } |
199 | |
200 | @Override |
201 | int addRowTry(SearchRow row) { |
202 | while (true) { |
203 | int x = find(row, false, true, true); |
204 | PageBtree page = index.getPage(childPageIds[x]); |
205 | int splitPoint = page.addRowTry(row); |
206 | if (splitPoint == -1) { |
207 | break; |
208 | } |
209 | SearchRow pivot = page.getRow(splitPoint - 1); |
210 | index.getPageStore().logUndo(this, data); |
211 | int splitPoint2 = addChildTry(pivot); |
212 | if (splitPoint2 != -1) { |
213 | return splitPoint2; |
214 | } |
215 | PageBtree page2 = page.split(splitPoint); |
216 | readAllRows(); |
217 | addChild(x, page2.getPos(), pivot); |
218 | index.getPageStore().update(page); |
219 | index.getPageStore().update(page2); |
220 | index.getPageStore().update(this); |
221 | } |
222 | updateRowCount(1); |
223 | written = false; |
224 | changeCount = index.getPageStore().getChangeCount(); |
225 | return -1; |
226 | } |
227 | |
228 | private void updateRowCount(int offset) { |
229 | if (rowCount != UNKNOWN_ROWCOUNT) { |
230 | rowCount += offset; |
231 | } |
232 | if (rowCountStored != UNKNOWN_ROWCOUNT) { |
233 | rowCountStored = UNKNOWN_ROWCOUNT; |
234 | index.getPageStore().logUndo(this, data); |
235 | if (written) { |
236 | writeHead(); |
237 | } |
238 | index.getPageStore().update(this); |
239 | } |
240 | } |
241 | |
242 | @Override |
243 | PageBtree split(int splitPoint) { |
244 | int newPageId = index.getPageStore().allocatePage(); |
245 | PageBtreeNode p2 = PageBtreeNode.create(index, newPageId, parentPageId); |
246 | index.getPageStore().logUndo(this, data); |
247 | if (onlyPosition) { |
248 | // TODO optimize: maybe not required |
249 | p2.onlyPosition = true; |
250 | } |
251 | int firstChild = childPageIds[splitPoint]; |
252 | readAllRows(); |
253 | for (int i = splitPoint; i < entryCount;) { |
254 | p2.addChild(p2.entryCount, childPageIds[splitPoint + 1], getRow(splitPoint)); |
255 | removeChild(splitPoint); |
256 | } |
257 | int lastChild = childPageIds[splitPoint - 1]; |
258 | removeChild(splitPoint - 1); |
259 | childPageIds[splitPoint - 1] = lastChild; |
260 | if (p2.childPageIds == null) { |
261 | p2.childPageIds = new int[1]; |
262 | } |
263 | p2.childPageIds[0] = firstChild; |
264 | p2.remapChildren(); |
265 | return p2; |
266 | } |
267 | |
268 | @Override |
269 | protected void remapChildren() { |
270 | for (int i = 0; i < entryCount + 1; i++) { |
271 | int child = childPageIds[i]; |
272 | PageBtree p = index.getPage(child); |
273 | p.setParentPageId(getPos()); |
274 | index.getPageStore().update(p); |
275 | } |
276 | } |
277 | |
278 | /** |
279 | * Initialize the page. |
280 | * |
281 | * @param page1 the first child page |
282 | * @param pivot the pivot key |
283 | * @param page2 the last child page |
284 | */ |
285 | void init(PageBtree page1, SearchRow pivot, PageBtree page2) { |
286 | entryCount = 0; |
287 | childPageIds = new int[] { page1.getPos() }; |
288 | rows = SearchRow.EMPTY_ARRAY; |
289 | offsets = Utils.EMPTY_INT_ARRAY; |
290 | addChild(0, page2.getPos(), pivot); |
291 | if (pageStoreInternalCount) { |
292 | rowCount = page1.getRowCount() + page2.getRowCount(); |
293 | } |
294 | check(); |
295 | } |
296 | |
297 | @Override |
298 | void find(PageBtreeCursor cursor, SearchRow first, boolean bigger) { |
299 | int i = find(first, bigger, false, false); |
300 | if (i > entryCount) { |
301 | if (parentPageId == PageBtree.ROOT) { |
302 | return; |
303 | } |
304 | PageBtreeNode next = (PageBtreeNode) index.getPage(parentPageId); |
305 | next.find(cursor, first, bigger); |
306 | return; |
307 | } |
308 | PageBtree page = index.getPage(childPageIds[i]); |
309 | page.find(cursor, first, bigger); |
310 | } |
311 | |
312 | @Override |
313 | void last(PageBtreeCursor cursor) { |
314 | int child = childPageIds[entryCount]; |
315 | index.getPage(child).last(cursor); |
316 | } |
317 | |
318 | @Override |
319 | PageBtreeLeaf getFirstLeaf() { |
320 | int child = childPageIds[0]; |
321 | return index.getPage(child).getFirstLeaf(); |
322 | } |
323 | |
324 | @Override |
325 | PageBtreeLeaf getLastLeaf() { |
326 | int child = childPageIds[entryCount]; |
327 | return index.getPage(child).getLastLeaf(); |
328 | } |
329 | |
330 | @Override |
331 | SearchRow remove(SearchRow row) { |
332 | int at = find(row, false, false, true); |
333 | // merge is not implemented to allow concurrent usage |
334 | // TODO maybe implement merge |
335 | PageBtree page = index.getPage(childPageIds[at]); |
336 | SearchRow last = page.remove(row); |
337 | index.getPageStore().logUndo(this, data); |
338 | updateRowCount(-1); |
339 | written = false; |
340 | changeCount = index.getPageStore().getChangeCount(); |
341 | if (last == null) { |
342 | // the last row didn't change - nothing to do |
343 | return null; |
344 | } else if (last == row) { |
345 | // this child is now empty |
346 | index.getPageStore().free(page.getPos()); |
347 | if (entryCount < 1) { |
348 | // no more children - this page is empty as well |
349 | return row; |
350 | } |
351 | if (at == entryCount) { |
352 | // removing the last child |
353 | last = getRow(at - 1); |
354 | } else { |
355 | last = null; |
356 | } |
357 | removeChild(at); |
358 | index.getPageStore().update(this); |
359 | return last; |
360 | } |
361 | // the last row is in the last child |
362 | if (at == entryCount) { |
363 | return last; |
364 | } |
365 | int child = childPageIds[at]; |
366 | removeChild(at); |
367 | // TODO this can mean only the position is now stored |
368 | // should split at the next possible moment |
369 | addChild(at, child, last); |
370 | // remove and add swapped two children, fix that |
371 | int temp = childPageIds[at]; |
372 | childPageIds[at] = childPageIds[at + 1]; |
373 | childPageIds[at + 1] = temp; |
374 | index.getPageStore().update(this); |
375 | return null; |
376 | } |
377 | |
378 | @Override |
379 | int getRowCount() { |
380 | if (rowCount == UNKNOWN_ROWCOUNT) { |
381 | int count = 0; |
382 | for (int i = 0; i < entryCount + 1; i++) { |
383 | int child = childPageIds[i]; |
384 | PageBtree page = index.getPage(child); |
385 | count += page.getRowCount(); |
386 | index.getDatabase().setProgress( |
387 | DatabaseEventListener.STATE_SCAN_FILE, |
388 | index.getName(), count, Integer.MAX_VALUE); |
389 | } |
390 | rowCount = count; |
391 | } |
392 | return rowCount; |
393 | } |
394 | |
395 | @Override |
396 | void setRowCountStored(int rowCount) { |
397 | if (rowCount < 0 && pageStoreInternalCount) { |
398 | return; |
399 | } |
400 | this.rowCount = rowCount; |
401 | if (rowCountStored != rowCount) { |
402 | rowCountStored = rowCount; |
403 | index.getPageStore().logUndo(this, data); |
404 | if (written) { |
405 | changeCount = index.getPageStore().getChangeCount(); |
406 | writeHead(); |
407 | } |
408 | index.getPageStore().update(this); |
409 | } |
410 | } |
411 | |
412 | private void check() { |
413 | if (SysProperties.CHECK) { |
414 | for (int i = 0; i < entryCount + 1; i++) { |
415 | int child = childPageIds[i]; |
416 | if (child == 0) { |
417 | DbException.throwInternalError(); |
418 | } |
419 | } |
420 | } |
421 | } |
422 | |
423 | @Override |
424 | public void write() { |
425 | check(); |
426 | writeData(); |
427 | index.getPageStore().writePage(getPos(), data); |
428 | } |
429 | |
430 | private void writeHead() { |
431 | data.reset(); |
432 | data.writeByte((byte) (Page.TYPE_BTREE_NODE | |
433 | (onlyPosition ? 0 : Page.FLAG_LAST))); |
434 | data.writeShortInt(0); |
435 | data.writeInt(parentPageId); |
436 | data.writeVarInt(index.getId()); |
437 | data.writeInt(rowCountStored); |
438 | data.writeShortInt(entryCount); |
439 | } |
440 | |
441 | private void writeData() { |
442 | if (written) { |
443 | return; |
444 | } |
445 | readAllRows(); |
446 | writeHead(); |
447 | data.writeInt(childPageIds[entryCount]); |
448 | for (int i = 0; i < entryCount; i++) { |
449 | data.writeInt(childPageIds[i]); |
450 | data.writeShortInt(offsets[i]); |
451 | } |
452 | for (int i = 0; i < entryCount; i++) { |
453 | index.writeRow(data, offsets[i], rows[i], onlyPosition); |
454 | } |
455 | written = true; |
456 | } |
457 | |
458 | @Override |
459 | void freeRecursive() { |
460 | index.getPageStore().logUndo(this, data); |
461 | index.getPageStore().free(getPos()); |
462 | for (int i = 0; i < entryCount + 1; i++) { |
463 | int child = childPageIds[i]; |
464 | index.getPage(child).freeRecursive(); |
465 | } |
466 | } |
467 | |
468 | private void removeChild(int i) { |
469 | readAllRows(); |
470 | entryCount--; |
471 | if (pageStoreInternalCount) { |
472 | updateRowCount(-index.getPage(childPageIds[i]).getRowCount()); |
473 | } |
474 | written = false; |
475 | changeCount = index.getPageStore().getChangeCount(); |
476 | if (entryCount < 0) { |
477 | DbException.throwInternalError(); |
478 | } |
479 | if (entryCount > i) { |
480 | int startNext = i > 0 ? offsets[i - 1] : index.getPageStore().getPageSize(); |
481 | int rowLength = startNext - offsets[i]; |
482 | add(offsets, i, entryCount + 1, rowLength); |
483 | } |
484 | rows = remove(rows, entryCount + 1, i); |
485 | offsets = remove(offsets, entryCount + 1, i); |
486 | childPageIds = remove(childPageIds, entryCount + 2, i); |
487 | start -= CHILD_OFFSET_PAIR_LENGTH; |
488 | } |
489 | |
490 | /** |
491 | * Set the cursor to the first row of the next page. |
492 | * |
493 | * @param cursor the cursor |
494 | * @param pageId id of the next page |
495 | */ |
496 | void nextPage(PageBtreeCursor cursor, int pageId) { |
497 | int i; |
498 | // TODO maybe keep the index in the child page (transiently) |
499 | for (i = 0; i < entryCount + 1; i++) { |
500 | if (childPageIds[i] == pageId) { |
501 | i++; |
502 | break; |
503 | } |
504 | } |
505 | if (i > entryCount) { |
506 | if (parentPageId == PageBtree.ROOT) { |
507 | cursor.setCurrent(null, 0); |
508 | return; |
509 | } |
510 | PageBtreeNode next = (PageBtreeNode) index.getPage(parentPageId); |
511 | next.nextPage(cursor, getPos()); |
512 | return; |
513 | } |
514 | PageBtree page = index.getPage(childPageIds[i]); |
515 | PageBtreeLeaf leaf = page.getFirstLeaf(); |
516 | cursor.setCurrent(leaf, 0); |
517 | } |
518 | |
519 | /** |
520 | * Set the cursor to the last row of the previous page. |
521 | * |
522 | * @param cursor the cursor |
523 | * @param pageId id of the previous page |
524 | */ |
525 | void previousPage(PageBtreeCursor cursor, int pageId) { |
526 | int i; |
527 | // TODO maybe keep the index in the child page (transiently) |
528 | for (i = entryCount; i >= 0; i--) { |
529 | if (childPageIds[i] == pageId) { |
530 | i--; |
531 | break; |
532 | } |
533 | } |
534 | if (i < 0) { |
535 | if (parentPageId == PageBtree.ROOT) { |
536 | cursor.setCurrent(null, 0); |
537 | return; |
538 | } |
539 | PageBtreeNode previous = (PageBtreeNode) index.getPage(parentPageId); |
540 | previous.previousPage(cursor, getPos()); |
541 | return; |
542 | } |
543 | PageBtree page = index.getPage(childPageIds[i]); |
544 | PageBtreeLeaf leaf = page.getLastLeaf(); |
545 | cursor.setCurrent(leaf, leaf.entryCount - 1); |
546 | } |
547 | |
548 | |
549 | @Override |
550 | public String toString() { |
551 | return "page[" + getPos() + "] b-tree node table:" + |
552 | index.getId() + " entries:" + entryCount; |
553 | } |
554 | |
555 | @Override |
556 | public void moveTo(Session session, int newPos) { |
557 | PageStore store = index.getPageStore(); |
558 | store.logUndo(this, data); |
559 | PageBtreeNode p2 = PageBtreeNode.create(index, newPos, parentPageId); |
560 | readAllRows(); |
561 | p2.rowCountStored = rowCountStored; |
562 | p2.rowCount = rowCount; |
563 | p2.childPageIds = childPageIds; |
564 | p2.rows = rows; |
565 | p2.entryCount = entryCount; |
566 | p2.offsets = offsets; |
567 | p2.onlyPosition = onlyPosition; |
568 | p2.parentPageId = parentPageId; |
569 | p2.start = start; |
570 | store.update(p2); |
571 | if (parentPageId == ROOT) { |
572 | index.setRootPageId(session, newPos); |
573 | } else { |
574 | Page p = store.getPage(parentPageId); |
575 | if (!(p instanceof PageBtreeNode)) { |
576 | throw DbException.throwInternalError(); |
577 | } |
578 | PageBtreeNode n = (PageBtreeNode) p; |
579 | n.moveChild(getPos(), newPos); |
580 | } |
581 | for (int i = 0; i < entryCount + 1; i++) { |
582 | int child = childPageIds[i]; |
583 | PageBtree p = index.getPage(child); |
584 | p.setParentPageId(newPos); |
585 | store.update(p); |
586 | } |
587 | store.free(getPos()); |
588 | } |
589 | |
590 | /** |
591 | * One of the children has moved to a new page. |
592 | * |
593 | * @param oldPos the old position |
594 | * @param newPos the new position |
595 | */ |
596 | void moveChild(int oldPos, int newPos) { |
597 | for (int i = 0; i < entryCount + 1; i++) { |
598 | if (childPageIds[i] == oldPos) { |
599 | index.getPageStore().logUndo(this, data); |
600 | written = false; |
601 | changeCount = index.getPageStore().getChangeCount(); |
602 | childPageIds[i] = newPos; |
603 | index.getPageStore().update(this); |
604 | return; |
605 | } |
606 | } |
607 | throw DbException.throwInternalError(); |
608 | } |
609 | |
610 | } |