1 | /* |
2 | * Copyright 2004-2014 H2 Group. Multiple-Licensed under the MPL 2.0, |
3 | * and the EPL 1.0 (http://h2database.com/html/license.html). |
4 | * Initial Developer: H2 Group |
5 | */ |
6 | package org.h2.fulltext; |
7 | |
8 | import java.io.IOException; |
9 | import java.io.Reader; |
10 | import java.io.StreamTokenizer; |
11 | import java.sql.Clob; |
12 | import java.sql.Connection; |
13 | import java.sql.DatabaseMetaData; |
14 | import java.sql.PreparedStatement; |
15 | import java.sql.ResultSet; |
16 | import java.sql.SQLException; |
17 | import java.sql.Statement; |
18 | import java.sql.Types; |
19 | import java.util.ArrayList; |
20 | import java.util.Arrays; |
21 | import java.util.HashMap; |
22 | import java.util.HashSet; |
23 | import java.util.Iterator; |
24 | import java.util.StringTokenizer; |
25 | import java.util.UUID; |
26 | import org.h2.api.Trigger; |
27 | import org.h2.command.Parser; |
28 | import org.h2.engine.Session; |
29 | import org.h2.expression.Comparison; |
30 | import org.h2.expression.ConditionAndOr; |
31 | import org.h2.expression.Expression; |
32 | import org.h2.expression.ExpressionColumn; |
33 | import org.h2.expression.ValueExpression; |
34 | import org.h2.jdbc.JdbcConnection; |
35 | import org.h2.message.DbException; |
36 | import org.h2.tools.SimpleResultSet; |
37 | import org.h2.util.IOUtils; |
38 | import org.h2.util.New; |
39 | import org.h2.util.StatementBuilder; |
40 | import org.h2.util.StringUtils; |
41 | |
42 | /** |
43 | * This class implements the native full text search. |
44 | * Most methods can be called using SQL statements as well. |
45 | */ |
46 | public class FullText { |
47 | |
48 | /** |
49 | * A column name of the result set returned by the searchData method. |
50 | */ |
51 | private static final String FIELD_SCHEMA = "SCHEMA"; |
52 | |
53 | /** |
54 | * A column name of the result set returned by the searchData method. |
55 | */ |
56 | private static final String FIELD_TABLE = "TABLE"; |
57 | |
58 | /** |
59 | * A column name of the result set returned by the searchData method. |
60 | */ |
61 | private static final String FIELD_COLUMNS = "COLUMNS"; |
62 | |
63 | /** |
64 | * A column name of the result set returned by the searchData method. |
65 | */ |
66 | private static final String FIELD_KEYS = "KEYS"; |
67 | |
68 | /** |
69 | * The hit score. |
70 | */ |
71 | private static final String FIELD_SCORE = "SCORE"; |
72 | |
73 | private static final String TRIGGER_PREFIX = "FT_"; |
74 | private static final String SCHEMA = "FT"; |
75 | private static final String SELECT_MAP_BY_WORD_ID = |
76 | "SELECT ROWID FROM " + SCHEMA + ".MAP WHERE WORDID=?"; |
77 | private static final String SELECT_ROW_BY_ID = |
78 | "SELECT KEY, INDEXID FROM " + SCHEMA + ".ROWS WHERE ID=?"; |
79 | |
80 | /** |
81 | * The column name of the result set returned by the search method. |
82 | */ |
83 | private static final String FIELD_QUERY = "QUERY"; |
84 | |
85 | /** |
86 | * Initializes full text search functionality for this database. This adds |
87 | * the following Java functions to the database: |
88 | * <ul> |
89 | * <li>FT_CREATE_INDEX(schemaNameString, tableNameString, |
90 | * columnListString)</li> |
91 | * <li>FT_SEARCH(queryString, limitInt, offsetInt): result set</li> |
92 | * <li>FT_REINDEX()</li> |
93 | * <li>FT_DROP_ALL()</li> |
94 | * </ul> |
95 | * It also adds a schema FT to the database where bookkeeping information |
96 | * is stored. This function may be called from a Java application, or by |
97 | * using the SQL statements: |
98 | * |
99 | * <pre> |
100 | * CREATE ALIAS IF NOT EXISTS FT_INIT FOR |
101 | * "org.h2.fulltext.FullText.init"; |
102 | * CALL FT_INIT(); |
103 | * </pre> |
104 | * |
105 | * @param conn the connection |
106 | */ |
107 | public static void init(Connection conn) throws SQLException { |
108 | Statement stat = conn.createStatement(); |
109 | stat.execute("CREATE SCHEMA IF NOT EXISTS " + SCHEMA); |
110 | stat.execute("CREATE TABLE IF NOT EXISTS " + SCHEMA + |
111 | ".INDEXES(ID INT AUTO_INCREMENT PRIMARY KEY, " + |
112 | "SCHEMA VARCHAR, TABLE VARCHAR, COLUMNS VARCHAR, " + |
113 | "UNIQUE(SCHEMA, TABLE))"); |
114 | stat.execute("CREATE TABLE IF NOT EXISTS " + SCHEMA + |
115 | ".WORDS(ID INT AUTO_INCREMENT PRIMARY KEY, " + |
116 | "NAME VARCHAR, UNIQUE(NAME))"); |
117 | stat.execute("CREATE TABLE IF NOT EXISTS " + SCHEMA + |
118 | ".ROWS(ID IDENTITY, HASH INT, INDEXID INT, " + |
119 | "KEY VARCHAR, UNIQUE(HASH, INDEXID, KEY))"); |
120 | stat.execute("CREATE TABLE IF NOT EXISTS " + SCHEMA + |
121 | ".MAP(ROWID INT, WORDID INT, PRIMARY KEY(WORDID, ROWID))"); |
122 | stat.execute("CREATE TABLE IF NOT EXISTS " + SCHEMA + |
123 | ".IGNORELIST(LIST VARCHAR)"); |
124 | stat.execute("CREATE TABLE IF NOT EXISTS " + SCHEMA + |
125 | ".SETTINGS(KEY VARCHAR PRIMARY KEY, VALUE VARCHAR)"); |
126 | stat.execute("CREATE ALIAS IF NOT EXISTS FT_CREATE_INDEX FOR \"" + |
127 | FullText.class.getName() + ".createIndex\""); |
128 | stat.execute("CREATE ALIAS IF NOT EXISTS FT_DROP_INDEX FOR \"" + |
129 | FullText.class.getName() + ".dropIndex\""); |
130 | stat.execute("CREATE ALIAS IF NOT EXISTS FT_SEARCH FOR \"" + |
131 | FullText.class.getName() + ".search\""); |
132 | stat.execute("CREATE ALIAS IF NOT EXISTS FT_SEARCH_DATA FOR \"" + |
133 | FullText.class.getName() + ".searchData\""); |
134 | stat.execute("CREATE ALIAS IF NOT EXISTS FT_REINDEX FOR \"" + |
135 | FullText.class.getName() + ".reindex\""); |
136 | stat.execute("CREATE ALIAS IF NOT EXISTS FT_DROP_ALL FOR \"" + |
137 | FullText.class.getName() + ".dropAll\""); |
138 | FullTextSettings setting = FullTextSettings.getInstance(conn); |
139 | ResultSet rs = stat.executeQuery("SELECT * FROM " + SCHEMA + |
140 | ".IGNORELIST"); |
141 | while (rs.next()) { |
142 | String commaSeparatedList = rs.getString(1); |
143 | setIgnoreList(setting, commaSeparatedList); |
144 | } |
145 | rs = stat.executeQuery("SELECT * FROM " + SCHEMA + ".SETTINGS"); |
146 | while (rs.next()) { |
147 | String key = rs.getString(1); |
148 | if ("whitespaceChars".equals(key)) { |
149 | String value = rs.getString(2); |
150 | setting.setWhitespaceChars(value); |
151 | } |
152 | } |
153 | rs = stat.executeQuery("SELECT * FROM " + SCHEMA + ".WORDS"); |
154 | HashMap<String, Integer> map = setting.getWordList(); |
155 | while (rs.next()) { |
156 | String word = rs.getString("NAME"); |
157 | int id = rs.getInt("ID"); |
158 | word = setting.convertWord(word); |
159 | if (word != null) { |
160 | map.put(word, id); |
161 | } |
162 | } |
163 | setting.setInitialized(true); |
164 | } |
165 | |
166 | /** |
167 | * Create a new full text index for a table and column list. Each table may |
168 | * only have one index at any time. |
169 | * |
170 | * @param conn the connection |
171 | * @param schema the schema name of the table (case sensitive) |
172 | * @param table the table name (case sensitive) |
173 | * @param columnList the column list (null for all columns) |
174 | */ |
175 | public static void createIndex(Connection conn, String schema, |
176 | String table, String columnList) throws SQLException { |
177 | init(conn); |
178 | PreparedStatement prep = conn.prepareStatement("INSERT INTO " + SCHEMA |
179 | + ".INDEXES(SCHEMA, TABLE, COLUMNS) VALUES(?, ?, ?)"); |
180 | prep.setString(1, schema); |
181 | prep.setString(2, table); |
182 | prep.setString(3, columnList); |
183 | prep.execute(); |
184 | createTrigger(conn, schema, table); |
185 | indexExistingRows(conn, schema, table); |
186 | } |
187 | |
188 | /** |
189 | * Re-creates the full text index for this database. Calling this method is |
190 | * usually not needed, as the index is kept up-to-date automatically. |
191 | * |
192 | * @param conn the connection |
193 | */ |
194 | public static void reindex(Connection conn) throws SQLException { |
195 | init(conn); |
196 | removeAllTriggers(conn, TRIGGER_PREFIX); |
197 | FullTextSettings setting = FullTextSettings.getInstance(conn); |
198 | setting.getWordList().clear(); |
199 | Statement stat = conn.createStatement(); |
200 | stat.execute("TRUNCATE TABLE " + SCHEMA + ".WORDS"); |
201 | stat.execute("TRUNCATE TABLE " + SCHEMA + ".ROWS"); |
202 | stat.execute("TRUNCATE TABLE " + SCHEMA + ".MAP"); |
203 | ResultSet rs = stat.executeQuery("SELECT * FROM " + SCHEMA + ".INDEXES"); |
204 | while (rs.next()) { |
205 | String schema = rs.getString("SCHEMA"); |
206 | String table = rs.getString("TABLE"); |
207 | createTrigger(conn, schema, table); |
208 | indexExistingRows(conn, schema, table); |
209 | } |
210 | } |
211 | |
212 | /** |
213 | * Drop an existing full text index for a table. This method returns |
214 | * silently if no index for this table exists. |
215 | * |
216 | * @param conn the connection |
217 | * @param schema the schema name of the table (case sensitive) |
218 | * @param table the table name (case sensitive) |
219 | */ |
220 | public static void dropIndex(Connection conn, String schema, String table) |
221 | throws SQLException { |
222 | init(conn); |
223 | PreparedStatement prep = conn.prepareStatement("SELECT ID FROM " + SCHEMA |
224 | + ".INDEXES WHERE SCHEMA=? AND TABLE=?"); |
225 | prep.setString(1, schema); |
226 | prep.setString(2, table); |
227 | ResultSet rs = prep.executeQuery(); |
228 | if (!rs.next()) { |
229 | return; |
230 | } |
231 | int indexId = rs.getInt(1); |
232 | prep = conn.prepareStatement("DELETE FROM " + SCHEMA |
233 | + ".INDEXES WHERE ID=?"); |
234 | prep.setInt(1, indexId); |
235 | prep.execute(); |
236 | createOrDropTrigger(conn, schema, table, false); |
237 | prep = conn.prepareStatement("DELETE FROM " + SCHEMA + |
238 | ".ROWS WHERE INDEXID=? AND ROWNUM<10000"); |
239 | while (true) { |
240 | prep.setInt(1, indexId); |
241 | int deleted = prep.executeUpdate(); |
242 | if (deleted == 0) { |
243 | break; |
244 | } |
245 | } |
246 | prep = conn.prepareStatement("DELETE FROM " + SCHEMA + ".MAP M " + |
247 | "WHERE NOT EXISTS (SELECT * FROM " + SCHEMA + |
248 | ".ROWS R WHERE R.ID=M.ROWID) AND ROWID<10000"); |
249 | while (true) { |
250 | int deleted = prep.executeUpdate(); |
251 | if (deleted == 0) { |
252 | break; |
253 | } |
254 | } |
255 | } |
256 | |
257 | /** |
258 | * Drops all full text indexes from the database. |
259 | * |
260 | * @param conn the connection |
261 | */ |
262 | public static void dropAll(Connection conn) throws SQLException { |
263 | init(conn); |
264 | Statement stat = conn.createStatement(); |
265 | stat.execute("DROP SCHEMA IF EXISTS " + SCHEMA); |
266 | removeAllTriggers(conn, TRIGGER_PREFIX); |
267 | FullTextSettings setting = FullTextSettings.getInstance(conn); |
268 | setting.removeAllIndexes(); |
269 | setting.getIgnoreList().clear(); |
270 | setting.getWordList().clear(); |
271 | } |
272 | |
273 | /** |
274 | * Searches from the full text index for this database. |
275 | * The returned result set has the following column: |
276 | * <ul><li>QUERY (varchar): the query to use to get the data. |
277 | * The query does not include 'SELECT * FROM '. Example: |
278 | * PUBLIC.TEST WHERE ID = 1 |
279 | * </li><li>SCORE (float) the relevance score. This value is always 1.0 |
280 | * for the native fulltext search. |
281 | * </li></ul> |
282 | * |
283 | * @param conn the connection |
284 | * @param text the search query |
285 | * @param limit the maximum number of rows or 0 for no limit |
286 | * @param offset the offset or 0 for no offset |
287 | * @return the result set |
288 | */ |
289 | public static ResultSet search(Connection conn, String text, int limit, |
290 | int offset) throws SQLException { |
291 | try { |
292 | return search(conn, text, limit, offset, false); |
293 | } catch (DbException e) { |
294 | throw DbException.toSQLException(e); |
295 | } |
296 | } |
297 | |
298 | /** |
299 | * Searches from the full text index for this database. The result contains |
300 | * the primary key data as an array. The returned result set has the |
301 | * following columns: |
302 | * <ul> |
303 | * <li>SCHEMA (varchar): the schema name. Example: PUBLIC </li> |
304 | * <li>TABLE (varchar): the table name. Example: TEST </li> |
305 | * <li>COLUMNS (array of varchar): comma separated list of quoted column |
306 | * names. The column names are quoted if necessary. Example: (ID) </li> |
307 | * <li>KEYS (array of values): comma separated list of values. Example: (1) |
308 | * </li> |
309 | * <li>SCORE (float) the relevance score. This value is always 1.0 |
310 | * for the native fulltext search. |
311 | * </li> |
312 | * </ul> |
313 | * |
314 | * @param conn the connection |
315 | * @param text the search query |
316 | * @param limit the maximum number of rows or 0 for no limit |
317 | * @param offset the offset or 0 for no offset |
318 | * @return the result set |
319 | */ |
320 | public static ResultSet searchData(Connection conn, String text, int limit, |
321 | int offset) throws SQLException { |
322 | try { |
323 | return search(conn, text, limit, offset, true); |
324 | } catch (DbException e) { |
325 | throw DbException.toSQLException(e); |
326 | } |
327 | } |
328 | |
329 | /** |
330 | * Change the ignore list. The ignore list is a comma separated list of |
331 | * common words that must not be indexed. The default ignore list is empty. |
332 | * If indexes already exist at the time this list is changed, reindex must |
333 | * be called. |
334 | * |
335 | * @param conn the connection |
336 | * @param commaSeparatedList the list |
337 | */ |
338 | public static void setIgnoreList(Connection conn, String commaSeparatedList) |
339 | throws SQLException { |
340 | try { |
341 | init(conn); |
342 | FullTextSettings setting = FullTextSettings.getInstance(conn); |
343 | setIgnoreList(setting, commaSeparatedList); |
344 | Statement stat = conn.createStatement(); |
345 | stat.execute("TRUNCATE TABLE " + SCHEMA + ".IGNORELIST"); |
346 | PreparedStatement prep = conn.prepareStatement("INSERT INTO " + |
347 | SCHEMA + ".IGNORELIST VALUES(?)"); |
348 | prep.setString(1, commaSeparatedList); |
349 | prep.execute(); |
350 | } catch (DbException e) { |
351 | throw DbException.toSQLException(e); |
352 | } |
353 | } |
354 | |
355 | /** |
356 | * Change the whitespace characters. The whitespace characters are used to |
357 | * separate words. If indexes already exist at the time this list is |
358 | * changed, reindex must be called. |
359 | * |
360 | * @param conn the connection |
361 | * @param whitespaceChars the list of characters |
362 | */ |
363 | public static void setWhitespaceChars(Connection conn, |
364 | String whitespaceChars) throws SQLException { |
365 | try { |
366 | init(conn); |
367 | FullTextSettings setting = FullTextSettings.getInstance(conn); |
368 | setting.setWhitespaceChars(whitespaceChars); |
369 | PreparedStatement prep = conn.prepareStatement("MERGE INTO " + |
370 | SCHEMA + ".SETTINGS VALUES(?, ?)"); |
371 | prep.setString(1, "whitespaceChars"); |
372 | prep.setString(2, whitespaceChars); |
373 | prep.execute(); |
374 | } catch (DbException e) { |
375 | throw DbException.toSQLException(e); |
376 | } |
377 | } |
378 | |
379 | /** |
380 | * INTERNAL. |
381 | * Convert the object to a string. |
382 | * |
383 | * @param data the object |
384 | * @param type the SQL type |
385 | * @return the string |
386 | */ |
387 | protected static String asString(Object data, int type) throws SQLException { |
388 | if (data == null) { |
389 | return "NULL"; |
390 | } |
391 | switch (type) { |
392 | case Types.BIT: |
393 | case Types.BOOLEAN: |
394 | case Types.INTEGER: |
395 | case Types.BIGINT: |
396 | case Types.DECIMAL: |
397 | case Types.DOUBLE: |
398 | case Types.FLOAT: |
399 | case Types.NUMERIC: |
400 | case Types.REAL: |
401 | case Types.SMALLINT: |
402 | case Types.TINYINT: |
403 | case Types.DATE: |
404 | case Types.TIME: |
405 | case Types.TIMESTAMP: |
406 | case Types.LONGVARCHAR: |
407 | case Types.CHAR: |
408 | case Types.VARCHAR: |
409 | return data.toString(); |
410 | case Types.CLOB: |
411 | try { |
412 | if (data instanceof Clob) { |
413 | data = ((Clob) data).getCharacterStream(); |
414 | } |
415 | return IOUtils.readStringAndClose((Reader) data, -1); |
416 | } catch (IOException e) { |
417 | throw DbException.toSQLException(e); |
418 | } |
419 | case Types.VARBINARY: |
420 | case Types.LONGVARBINARY: |
421 | case Types.BINARY: |
422 | case Types.JAVA_OBJECT: |
423 | case Types.OTHER: |
424 | case Types.BLOB: |
425 | case Types.STRUCT: |
426 | case Types.REF: |
427 | case Types.NULL: |
428 | case Types.ARRAY: |
429 | case Types.DATALINK: |
430 | case Types.DISTINCT: |
431 | throw throwException("Unsupported column data type: " + type); |
432 | default: |
433 | return ""; |
434 | } |
435 | } |
436 | |
437 | /** |
438 | * Create an empty search result and initialize the columns. |
439 | * |
440 | * @param data true if the result set should contain the primary key data as |
441 | * an array. |
442 | * @return the empty result set |
443 | */ |
444 | protected static SimpleResultSet createResultSet(boolean data) { |
445 | SimpleResultSet result = new SimpleResultSet(); |
446 | if (data) { |
447 | result.addColumn(FullText.FIELD_SCHEMA, Types.VARCHAR, 0, 0); |
448 | result.addColumn(FullText.FIELD_TABLE, Types.VARCHAR, 0, 0); |
449 | result.addColumn(FullText.FIELD_COLUMNS, Types.ARRAY, 0, 0); |
450 | result.addColumn(FullText.FIELD_KEYS, Types.ARRAY, 0, 0); |
451 | } else { |
452 | result.addColumn(FullText.FIELD_QUERY, Types.VARCHAR, 0, 0); |
453 | } |
454 | result.addColumn(FullText.FIELD_SCORE, Types.FLOAT, 0, 0); |
455 | return result; |
456 | } |
457 | |
458 | /** |
459 | * Parse a primary key condition into the primary key columns. |
460 | * |
461 | * @param conn the database connection |
462 | * @param key the primary key condition as a string |
463 | * @return an array containing the column name list and the data list |
464 | */ |
465 | protected static Object[][] parseKey(Connection conn, String key) { |
466 | ArrayList<String> columns = New.arrayList(); |
467 | ArrayList<String> data = New.arrayList(); |
468 | JdbcConnection c = (JdbcConnection) conn; |
469 | Session session = (Session) c.getSession(); |
470 | Parser p = new Parser(session); |
471 | Expression expr = p.parseExpression(key); |
472 | addColumnData(columns, data, expr); |
473 | Object[] col = new Object[columns.size()]; |
474 | columns.toArray(col); |
475 | Object[] dat = new Object[columns.size()]; |
476 | data.toArray(dat); |
477 | Object[][] columnData = { col, dat }; |
478 | return columnData; |
479 | } |
480 | |
481 | /** |
482 | * INTERNAL. |
483 | * Convert an object to a String as used in a SQL statement. |
484 | * |
485 | * @param data the object |
486 | * @param type the SQL type |
487 | * @return the SQL String |
488 | */ |
489 | protected static String quoteSQL(Object data, int type) throws SQLException { |
490 | if (data == null) { |
491 | return "NULL"; |
492 | } |
493 | switch (type) { |
494 | case Types.BIT: |
495 | case Types.BOOLEAN: |
496 | case Types.INTEGER: |
497 | case Types.BIGINT: |
498 | case Types.DECIMAL: |
499 | case Types.DOUBLE: |
500 | case Types.FLOAT: |
501 | case Types.NUMERIC: |
502 | case Types.REAL: |
503 | case Types.SMALLINT: |
504 | case Types.TINYINT: |
505 | return data.toString(); |
506 | case Types.DATE: |
507 | case Types.TIME: |
508 | case Types.TIMESTAMP: |
509 | case Types.LONGVARCHAR: |
510 | case Types.CHAR: |
511 | case Types.VARCHAR: |
512 | return quoteString(data.toString()); |
513 | case Types.VARBINARY: |
514 | case Types.LONGVARBINARY: |
515 | case Types.BINARY: |
516 | if (data instanceof UUID) { |
517 | return "'" + data.toString() + "'"; |
518 | } |
519 | return "'" + StringUtils.convertBytesToHex((byte[]) data) + "'"; |
520 | case Types.CLOB: |
521 | case Types.JAVA_OBJECT: |
522 | case Types.OTHER: |
523 | case Types.BLOB: |
524 | case Types.STRUCT: |
525 | case Types.REF: |
526 | case Types.NULL: |
527 | case Types.ARRAY: |
528 | case Types.DATALINK: |
529 | case Types.DISTINCT: |
530 | throw throwException("Unsupported key data type: " + type); |
531 | default: |
532 | return ""; |
533 | } |
534 | } |
535 | |
536 | /** |
537 | * Remove all triggers that start with the given prefix. |
538 | * |
539 | * @param conn the database connection |
540 | * @param prefix the prefix |
541 | */ |
542 | protected static void removeAllTriggers(Connection conn, String prefix) |
543 | throws SQLException { |
544 | Statement stat = conn.createStatement(); |
545 | ResultSet rs = stat.executeQuery("SELECT * FROM INFORMATION_SCHEMA.TRIGGERS"); |
546 | Statement stat2 = conn.createStatement(); |
547 | while (rs.next()) { |
548 | String schema = rs.getString("TRIGGER_SCHEMA"); |
549 | String name = rs.getString("TRIGGER_NAME"); |
550 | if (name.startsWith(prefix)) { |
551 | name = StringUtils.quoteIdentifier(schema) + "." + |
552 | StringUtils.quoteIdentifier(name); |
553 | stat2.execute("DROP TRIGGER " + name); |
554 | } |
555 | } |
556 | } |
557 | |
558 | /** |
559 | * Set the column indices of a set of keys. |
560 | * |
561 | * @param index the column indices (will be modified) |
562 | * @param keys the key list |
563 | * @param columns the column list |
564 | */ |
565 | protected static void setColumns(int[] index, ArrayList<String> keys, |
566 | ArrayList<String> columns) throws SQLException { |
567 | for (int i = 0, keySize = keys.size(); i < keySize; i++) { |
568 | String key = keys.get(i); |
569 | int found = -1; |
570 | int columnsSize = columns.size(); |
571 | for (int j = 0; found == -1 && j < columnsSize; j++) { |
572 | String column = columns.get(j); |
573 | if (column.equals(key)) { |
574 | found = j; |
575 | } |
576 | } |
577 | if (found < 0) { |
578 | throw throwException("Column not found: " + key); |
579 | } |
580 | index[i] = found; |
581 | } |
582 | } |
583 | |
584 | /** |
585 | * Do the search. |
586 | * |
587 | * @param conn the database connection |
588 | * @param text the query |
589 | * @param limit the limit |
590 | * @param offset the offset |
591 | * @param data whether the raw data should be returned |
592 | * @return the result set |
593 | */ |
594 | protected static ResultSet search(Connection conn, String text, int limit, |
595 | int offset, boolean data) throws SQLException { |
596 | SimpleResultSet result = createResultSet(data); |
597 | if (conn.getMetaData().getURL().startsWith("jdbc:columnlist:")) { |
598 | // this is just to query the result set columns |
599 | return result; |
600 | } |
601 | if (text == null || text.trim().length() == 0) { |
602 | return result; |
603 | } |
604 | FullTextSettings setting = FullTextSettings.getInstance(conn); |
605 | if (!setting.isInitialized()) { |
606 | init(conn); |
607 | } |
608 | HashSet<String> words = New.hashSet(); |
609 | addWords(setting, words, text); |
610 | HashSet<Integer> rIds = null, lastRowIds = null; |
611 | HashMap<String, Integer> allWords = setting.getWordList(); |
612 | |
613 | PreparedStatement prepSelectMapByWordId = setting.prepare(conn, |
614 | SELECT_MAP_BY_WORD_ID); |
615 | for (String word : words) { |
616 | lastRowIds = rIds; |
617 | rIds = New.hashSet(); |
618 | Integer wId = allWords.get(word); |
619 | if (wId == null) { |
620 | continue; |
621 | } |
622 | prepSelectMapByWordId.setInt(1, wId.intValue()); |
623 | ResultSet rs = prepSelectMapByWordId.executeQuery(); |
624 | while (rs.next()) { |
625 | Integer rId = rs.getInt(1); |
626 | if (lastRowIds == null || lastRowIds.contains(rId)) { |
627 | rIds.add(rId); |
628 | } |
629 | } |
630 | } |
631 | if (rIds == null || rIds.size() == 0) { |
632 | return result; |
633 | } |
634 | PreparedStatement prepSelectRowById = setting.prepare(conn, SELECT_ROW_BY_ID); |
635 | int rowCount = 0; |
636 | for (int rowId : rIds) { |
637 | prepSelectRowById.setInt(1, rowId); |
638 | ResultSet rs = prepSelectRowById.executeQuery(); |
639 | if (!rs.next()) { |
640 | continue; |
641 | } |
642 | if (offset > 0) { |
643 | offset--; |
644 | } else { |
645 | String key = rs.getString(1); |
646 | int indexId = rs.getInt(2); |
647 | IndexInfo index = setting.getIndexInfo(indexId); |
648 | if (data) { |
649 | Object[][] columnData = parseKey(conn, key); |
650 | result.addRow( |
651 | index.schema, |
652 | index.table, |
653 | columnData[0], |
654 | columnData[1], |
655 | 1.0); |
656 | } else { |
657 | String query = StringUtils.quoteIdentifier(index.schema) + |
658 | "." + StringUtils.quoteIdentifier(index.table) + |
659 | " WHERE " + key; |
660 | result.addRow(query, 1.0); |
661 | } |
662 | rowCount++; |
663 | if (limit > 0 && rowCount >= limit) { |
664 | break; |
665 | } |
666 | } |
667 | } |
668 | return result; |
669 | } |
670 | |
671 | private static void addColumnData(ArrayList<String> columns, |
672 | ArrayList<String> data, Expression expr) { |
673 | if (expr instanceof ConditionAndOr) { |
674 | ConditionAndOr and = (ConditionAndOr) expr; |
675 | Expression left = and.getExpression(true); |
676 | Expression right = and.getExpression(false); |
677 | addColumnData(columns, data, left); |
678 | addColumnData(columns, data, right); |
679 | } else { |
680 | Comparison comp = (Comparison) expr; |
681 | ExpressionColumn ec = (ExpressionColumn) comp.getExpression(true); |
682 | ValueExpression ev = (ValueExpression) comp.getExpression(false); |
683 | String columnName = ec.getColumnName(); |
684 | columns.add(columnName); |
685 | if (ev == null) { |
686 | data.add(null); |
687 | } else { |
688 | data.add(ev.getValue(null).getString()); |
689 | } |
690 | } |
691 | } |
692 | |
693 | /** |
694 | * Add all words in the given text to the hash set. |
695 | * |
696 | * @param setting the fulltext settings |
697 | * @param set the hash set |
698 | * @param reader the reader |
699 | */ |
700 | protected static void addWords(FullTextSettings setting, |
701 | HashSet<String> set, Reader reader) { |
702 | StreamTokenizer tokenizer = new StreamTokenizer(reader); |
703 | tokenizer.resetSyntax(); |
704 | tokenizer.wordChars(' ' + 1, 255); |
705 | char[] whitespaceChars = setting.getWhitespaceChars().toCharArray(); |
706 | for (char ch : whitespaceChars) { |
707 | tokenizer.whitespaceChars(ch, ch); |
708 | } |
709 | try { |
710 | while (true) { |
711 | int token = tokenizer.nextToken(); |
712 | if (token == StreamTokenizer.TT_EOF) { |
713 | break; |
714 | } else if (token == StreamTokenizer.TT_WORD) { |
715 | String word = tokenizer.sval; |
716 | word = setting.convertWord(word); |
717 | if (word != null) { |
718 | set.add(word); |
719 | } |
720 | } |
721 | } |
722 | } catch (IOException e) { |
723 | throw DbException.convertIOException(e, "Tokenizer error"); |
724 | } |
725 | } |
726 | |
727 | /** |
728 | * Add all words in the given text to the hash set. |
729 | * |
730 | * @param setting the fulltext settings |
731 | * @param set the hash set |
732 | * @param text the text |
733 | */ |
734 | protected static void addWords(FullTextSettings setting, |
735 | HashSet<String> set, String text) { |
736 | String whitespaceChars = setting.getWhitespaceChars(); |
737 | StringTokenizer tokenizer = new StringTokenizer(text, whitespaceChars); |
738 | while (tokenizer.hasMoreTokens()) { |
739 | String word = tokenizer.nextToken(); |
740 | word = setting.convertWord(word); |
741 | if (word != null) { |
742 | set.add(word); |
743 | } |
744 | } |
745 | } |
746 | |
747 | /** |
748 | * Create the trigger. |
749 | * |
750 | * @param conn the database connection |
751 | * @param schema the schema name |
752 | * @param table the table name |
753 | */ |
754 | protected static void createTrigger(Connection conn, String schema, |
755 | String table) throws SQLException { |
756 | createOrDropTrigger(conn, schema, table, true); |
757 | } |
758 | |
759 | private static void createOrDropTrigger(Connection conn, |
760 | String schema, String table, boolean create) throws SQLException { |
761 | Statement stat = conn.createStatement(); |
762 | String trigger = StringUtils.quoteIdentifier(schema) + "." |
763 | + StringUtils.quoteIdentifier(TRIGGER_PREFIX + table); |
764 | stat.execute("DROP TRIGGER IF EXISTS " + trigger); |
765 | if (create) { |
766 | StringBuilder buff = new StringBuilder("CREATE TRIGGER IF NOT EXISTS "); |
767 | // needs to be called on rollback as well, because we use the init |
768 | // connection do to changes in the index (not the user connection) |
769 | buff.append(trigger). |
770 | append(" AFTER INSERT, UPDATE, DELETE, ROLLBACK ON "). |
771 | append(StringUtils.quoteIdentifier(schema)). |
772 | append('.'). |
773 | append(StringUtils.quoteIdentifier(table)). |
774 | append(" FOR EACH ROW CALL \""). |
775 | append(FullText.FullTextTrigger.class.getName()). |
776 | append('\"'); |
777 | stat.execute(buff.toString()); |
778 | } |
779 | } |
780 | |
781 | /** |
782 | * Add the existing data to the index. |
783 | * |
784 | * @param conn the database connection |
785 | * @param schema the schema name |
786 | * @param table the table name |
787 | */ |
788 | protected static void indexExistingRows(Connection conn, String schema, |
789 | String table) throws SQLException { |
790 | FullText.FullTextTrigger existing = new FullText.FullTextTrigger(); |
791 | existing.init(conn, schema, null, table, false, Trigger.INSERT); |
792 | String sql = "SELECT * FROM " + StringUtils.quoteIdentifier(schema) + |
793 | "." + StringUtils.quoteIdentifier(table); |
794 | ResultSet rs = conn.createStatement().executeQuery(sql); |
795 | int columnCount = rs.getMetaData().getColumnCount(); |
796 | while (rs.next()) { |
797 | Object[] row = new Object[columnCount]; |
798 | for (int i = 0; i < columnCount; i++) { |
799 | row[i] = rs.getObject(i + 1); |
800 | } |
801 | existing.fire(conn, null, row); |
802 | } |
803 | } |
804 | |
805 | private static String quoteString(String data) { |
806 | if (data.indexOf('\'') < 0) { |
807 | return "'" + data + "'"; |
808 | } |
809 | int len = data.length(); |
810 | StringBuilder buff = new StringBuilder(len + 2); |
811 | buff.append('\''); |
812 | for (int i = 0; i < len; i++) { |
813 | char ch = data.charAt(i); |
814 | if (ch == '\'') { |
815 | buff.append(ch); |
816 | } |
817 | buff.append(ch); |
818 | } |
819 | buff.append('\''); |
820 | return buff.toString(); |
821 | } |
822 | |
823 | private static void setIgnoreList(FullTextSettings setting, |
824 | String commaSeparatedList) { |
825 | String[] list = StringUtils.arraySplit(commaSeparatedList, ',', true); |
826 | HashSet<String> set = setting.getIgnoreList(); |
827 | for (String word : list) { |
828 | String converted = setting.convertWord(word); |
829 | if (converted != null) { |
830 | set.add(converted); |
831 | } |
832 | } |
833 | } |
834 | |
835 | /** |
836 | * Check if a the indexed columns of a row probably have changed. It may |
837 | * return true even if the change was minimal (for example from 0.0 to |
838 | * 0.00). |
839 | * |
840 | * @param oldRow the old row |
841 | * @param newRow the new row |
842 | * @param indexColumns the indexed columns |
843 | * @return true if the indexed columns don't match |
844 | */ |
845 | protected static boolean hasChanged(Object[] oldRow, Object[] newRow, |
846 | int[] indexColumns) { |
847 | for (int c : indexColumns) { |
848 | Object o = oldRow[c], n = newRow[c]; |
849 | if (o == null) { |
850 | if (n != null) { |
851 | return true; |
852 | } |
853 | } else if (!o.equals(n)) { |
854 | return true; |
855 | } |
856 | } |
857 | return false; |
858 | } |
859 | |
860 | /** |
861 | * Trigger updates the index when a inserting, updating, or deleting a row. |
862 | */ |
863 | public static class FullTextTrigger implements Trigger { |
864 | |
865 | protected FullTextSettings setting; |
866 | protected IndexInfo index; |
867 | protected int[] columnTypes; |
868 | protected PreparedStatement prepInsertWord, prepInsertRow, prepInsertMap; |
869 | protected PreparedStatement prepDeleteRow, prepDeleteMap; |
870 | protected PreparedStatement prepSelectRow; |
871 | |
872 | /** |
873 | * INTERNAL |
874 | */ |
875 | @Override |
876 | public void init(Connection conn, String schemaName, String triggerName, |
877 | String tableName, boolean before, int type) throws SQLException { |
878 | setting = FullTextSettings.getInstance(conn); |
879 | if (!setting.isInitialized()) { |
880 | FullText.init(conn); |
881 | } |
882 | ArrayList<String> keyList = New.arrayList(); |
883 | DatabaseMetaData meta = conn.getMetaData(); |
884 | ResultSet rs = meta.getColumns(null, |
885 | StringUtils.escapeMetaDataPattern(schemaName), |
886 | StringUtils.escapeMetaDataPattern(tableName), |
887 | null); |
888 | ArrayList<String> columnList = New.arrayList(); |
889 | while (rs.next()) { |
890 | columnList.add(rs.getString("COLUMN_NAME")); |
891 | } |
892 | columnTypes = new int[columnList.size()]; |
893 | index = new IndexInfo(); |
894 | index.schema = schemaName; |
895 | index.table = tableName; |
896 | index.columns = new String[columnList.size()]; |
897 | columnList.toArray(index.columns); |
898 | rs = meta.getColumns(null, |
899 | StringUtils.escapeMetaDataPattern(schemaName), |
900 | StringUtils.escapeMetaDataPattern(tableName), |
901 | null); |
902 | for (int i = 0; rs.next(); i++) { |
903 | columnTypes[i] = rs.getInt("DATA_TYPE"); |
904 | } |
905 | if (keyList.size() == 0) { |
906 | rs = meta.getPrimaryKeys(null, |
907 | StringUtils.escapeMetaDataPattern(schemaName), |
908 | tableName); |
909 | while (rs.next()) { |
910 | keyList.add(rs.getString("COLUMN_NAME")); |
911 | } |
912 | } |
913 | if (keyList.size() == 0) { |
914 | throw throwException("No primary key for table " + tableName); |
915 | } |
916 | ArrayList<String> indexList = New.arrayList(); |
917 | PreparedStatement prep = conn.prepareStatement( |
918 | "SELECT ID, COLUMNS FROM " + SCHEMA + ".INDEXES WHERE SCHEMA=? AND TABLE=?"); |
919 | prep.setString(1, schemaName); |
920 | prep.setString(2, tableName); |
921 | rs = prep.executeQuery(); |
922 | if (rs.next()) { |
923 | index.id = rs.getInt(1); |
924 | String columns = rs.getString(2); |
925 | if (columns != null) { |
926 | for (String s : StringUtils.arraySplit(columns, ',', true)) { |
927 | indexList.add(s); |
928 | } |
929 | } |
930 | } |
931 | if (indexList.size() == 0) { |
932 | indexList.addAll(columnList); |
933 | } |
934 | index.keys = new int[keyList.size()]; |
935 | setColumns(index.keys, keyList, columnList); |
936 | index.indexColumns = new int[indexList.size()]; |
937 | setColumns(index.indexColumns, indexList, columnList); |
938 | setting.addIndexInfo(index); |
939 | prepInsertWord = conn.prepareStatement( |
940 | "INSERT INTO " + SCHEMA + ".WORDS(NAME) VALUES(?)"); |
941 | prepInsertRow = conn.prepareStatement( |
942 | "INSERT INTO " + SCHEMA + ".ROWS(HASH, INDEXID, KEY) VALUES(?, ?, ?)"); |
943 | prepInsertMap = conn.prepareStatement( |
944 | "INSERT INTO " + SCHEMA + ".MAP(ROWID, WORDID) VALUES(?, ?)"); |
945 | prepDeleteRow = conn.prepareStatement( |
946 | "DELETE FROM " + SCHEMA + ".ROWS WHERE HASH=? AND INDEXID=? AND KEY=?"); |
947 | prepDeleteMap = conn.prepareStatement( |
948 | "DELETE FROM " + SCHEMA + ".MAP WHERE ROWID=? AND WORDID=?"); |
949 | prepSelectRow = conn.prepareStatement( |
950 | "SELECT ID FROM " + SCHEMA + ".ROWS WHERE HASH=? AND INDEXID=? AND KEY=?"); |
951 | } |
952 | |
953 | /** |
954 | * INTERNAL |
955 | */ |
956 | @Override |
957 | public void fire(Connection conn, Object[] oldRow, Object[] newRow) |
958 | throws SQLException { |
959 | if (oldRow != null) { |
960 | if (newRow != null) { |
961 | // update |
962 | if (hasChanged(oldRow, newRow, index.indexColumns)) { |
963 | delete(oldRow); |
964 | insert(newRow); |
965 | } |
966 | } else { |
967 | // delete |
968 | delete(oldRow); |
969 | } |
970 | } else if (newRow != null) { |
971 | // insert |
972 | insert(newRow); |
973 | } |
974 | } |
975 | |
976 | /** |
977 | * INTERNAL |
978 | */ |
979 | @Override |
980 | public void close() { |
981 | setting.removeIndexInfo(index); |
982 | } |
983 | |
984 | /** |
985 | * INTERNAL |
986 | */ |
987 | @Override |
988 | public void remove() { |
989 | setting.removeIndexInfo(index); |
990 | } |
991 | |
992 | /** |
993 | * Add a row to the index. |
994 | * |
995 | * @param row the row |
996 | */ |
997 | protected void insert(Object[] row) throws SQLException { |
998 | String key = getKey(row); |
999 | int hash = key.hashCode(); |
1000 | prepInsertRow.setInt(1, hash); |
1001 | prepInsertRow.setInt(2, index.id); |
1002 | prepInsertRow.setString(3, key); |
1003 | prepInsertRow.execute(); |
1004 | ResultSet rs = prepInsertRow.getGeneratedKeys(); |
1005 | rs.next(); |
1006 | int rowId = rs.getInt(1); |
1007 | prepInsertMap.setInt(1, rowId); |
1008 | int[] wordIds = getWordIds(row); |
1009 | for (int id : wordIds) { |
1010 | prepInsertMap.setInt(2, id); |
1011 | prepInsertMap.execute(); |
1012 | } |
1013 | } |
1014 | |
1015 | /** |
1016 | * Delete a row from the index. |
1017 | * |
1018 | * @param row the row |
1019 | */ |
1020 | protected void delete(Object[] row) throws SQLException { |
1021 | String key = getKey(row); |
1022 | int hash = key.hashCode(); |
1023 | prepSelectRow.setInt(1, hash); |
1024 | prepSelectRow.setInt(2, index.id); |
1025 | prepSelectRow.setString(3, key); |
1026 | ResultSet rs = prepSelectRow.executeQuery(); |
1027 | if (rs.next()) { |
1028 | int rowId = rs.getInt(1); |
1029 | prepDeleteMap.setInt(1, rowId); |
1030 | int[] wordIds = getWordIds(row); |
1031 | for (int id : wordIds) { |
1032 | prepDeleteMap.setInt(2, id); |
1033 | prepDeleteMap.executeUpdate(); |
1034 | } |
1035 | prepDeleteRow.setInt(1, hash); |
1036 | prepDeleteRow.setInt(2, index.id); |
1037 | prepDeleteRow.setString(3, key); |
1038 | prepDeleteRow.executeUpdate(); |
1039 | } |
1040 | } |
1041 | |
1042 | private int[] getWordIds(Object[] row) throws SQLException { |
1043 | HashSet<String> words = New.hashSet(); |
1044 | for (int idx : index.indexColumns) { |
1045 | int type = columnTypes[idx]; |
1046 | Object data = row[idx]; |
1047 | if (type == Types.CLOB && data != null) { |
1048 | Reader reader; |
1049 | if (data instanceof Reader) { |
1050 | reader = (Reader) data; |
1051 | } else { |
1052 | reader = ((Clob) data).getCharacterStream(); |
1053 | } |
1054 | addWords(setting, words, reader); |
1055 | } else { |
1056 | String string = asString(data, type); |
1057 | addWords(setting, words, string); |
1058 | } |
1059 | } |
1060 | HashMap<String, Integer> allWords = setting.getWordList(); |
1061 | int[] wordIds = new int[words.size()]; |
1062 | Iterator<String> it = words.iterator(); |
1063 | for (int i = 0; it.hasNext(); i++) { |
1064 | String word = it.next(); |
1065 | Integer wId = allWords.get(word); |
1066 | int wordId; |
1067 | if (wId == null) { |
1068 | prepInsertWord.setString(1, word); |
1069 | prepInsertWord.execute(); |
1070 | ResultSet rs = prepInsertWord.getGeneratedKeys(); |
1071 | rs.next(); |
1072 | wordId = rs.getInt(1); |
1073 | allWords.put(word, wordId); |
1074 | } else { |
1075 | wordId = wId.intValue(); |
1076 | } |
1077 | wordIds[i] = wordId; |
1078 | } |
1079 | Arrays.sort(wordIds); |
1080 | return wordIds; |
1081 | } |
1082 | |
1083 | private String getKey(Object[] row) throws SQLException { |
1084 | StatementBuilder buff = new StatementBuilder(); |
1085 | for (int columnIndex : index.keys) { |
1086 | buff.appendExceptFirst(" AND "); |
1087 | buff.append(StringUtils.quoteIdentifier(index.columns[columnIndex])); |
1088 | Object o = row[columnIndex]; |
1089 | if (o == null) { |
1090 | buff.append(" IS NULL"); |
1091 | } else { |
1092 | buff.append('=').append(quoteSQL(o, columnTypes[columnIndex])); |
1093 | } |
1094 | } |
1095 | return buff.toString(); |
1096 | } |
1097 | |
1098 | } |
1099 | |
1100 | /** |
1101 | * INTERNAL |
1102 | * Close all fulltext settings, freeing up memory. |
1103 | */ |
1104 | public static void closeAll() { |
1105 | FullTextSettings.closeAll(); |
1106 | } |
1107 | |
1108 | /** |
1109 | * Throw a SQLException with the given message. |
1110 | * |
1111 | * @param message the message |
1112 | * @return never returns normally |
1113 | * @throws SQLException the exception |
1114 | */ |
1115 | protected static SQLException throwException(String message) |
1116 | throws SQLException { |
1117 | throw new SQLException(message, "FULLTEXT"); |
1118 | } |
1119 | |
1120 | } |