1 | /* |
2 | * Copyright 2004-2014 H2 Group. Multiple-Licensed under the MPL 2.0, |
3 | * and the EPL 1.0 (http://h2database.com/html/license.html). |
4 | * Initial Developer: H2 Group |
5 | */ |
6 | package org.h2.bnf; |
7 | |
8 | import java.io.ByteArrayInputStream; |
9 | import java.io.IOException; |
10 | import java.io.InputStreamReader; |
11 | import java.io.Reader; |
12 | import java.sql.ResultSet; |
13 | import java.sql.SQLException; |
14 | import java.util.ArrayList; |
15 | import java.util.HashMap; |
16 | import java.util.StringTokenizer; |
17 | |
18 | import org.h2.bnf.context.DbContextRule; |
19 | import org.h2.tools.Csv; |
20 | import org.h2.util.New; |
21 | import org.h2.util.StringUtils; |
22 | import org.h2.util.Utils; |
23 | |
24 | /** |
25 | * This class can read a file that is similar to BNF (Backus-Naur form). |
26 | * It is made specially to support SQL grammar. |
27 | */ |
28 | public class Bnf { |
29 | |
30 | /** |
31 | * The rule map. The key is lowercase, and all spaces |
32 | * are replaces with underscore. |
33 | */ |
34 | private final HashMap<String, RuleHead> ruleMap = New.hashMap(); |
35 | private String syntax; |
36 | private String currentToken; |
37 | private String[] tokens; |
38 | private char firstChar; |
39 | private int index; |
40 | private Rule lastRepeat; |
41 | private ArrayList<RuleHead> statements; |
42 | private String currentTopic; |
43 | |
44 | /** |
45 | * Create an instance using the grammar specified in the CSV file. |
46 | * |
47 | * @param csv if not specified, the help.csv is used |
48 | * @return a new instance |
49 | */ |
50 | public static Bnf getInstance(Reader csv) throws SQLException, IOException { |
51 | Bnf bnf = new Bnf(); |
52 | if (csv == null) { |
53 | byte[] data = Utils.getResource("/org/h2/res/help.csv"); |
54 | csv = new InputStreamReader(new ByteArrayInputStream(data)); |
55 | } |
56 | bnf.parse(csv); |
57 | return bnf; |
58 | } |
59 | |
60 | private void addFixedRule(String name, int fixedType) { |
61 | Rule rule = new RuleFixed(fixedType); |
62 | addRule(name, "Fixed", rule); |
63 | } |
64 | |
65 | private RuleHead addRule(String topic, String section, Rule rule) { |
66 | RuleHead head = new RuleHead(section, topic, rule); |
67 | String key = StringUtils.toLowerEnglish(topic.trim().replace(' ', '_')); |
68 | if (ruleMap.get(key) != null) { |
69 | throw new AssertionError("already exists: " + topic); |
70 | } |
71 | ruleMap.put(key, head); |
72 | return head; |
73 | } |
74 | |
75 | private void parse(Reader reader) throws SQLException, IOException { |
76 | Rule functions = null; |
77 | statements = New.arrayList(); |
78 | Csv csv = new Csv(); |
79 | csv.setLineCommentCharacter('#'); |
80 | ResultSet rs = csv.read(reader, null); |
81 | while (rs.next()) { |
82 | String section = rs.getString("SECTION").trim(); |
83 | if (section.startsWith("System")) { |
84 | continue; |
85 | } |
86 | String topic = rs.getString("TOPIC"); |
87 | syntax = rs.getString("SYNTAX").trim(); |
88 | currentTopic = section; |
89 | tokens = tokenize(); |
90 | index = 0; |
91 | Rule rule = parseRule(); |
92 | if (section.startsWith("Command")) { |
93 | rule = new RuleList(rule, new RuleElement(";\n\n", currentTopic), false); |
94 | } |
95 | RuleHead head = addRule(topic, section, rule); |
96 | if (section.startsWith("Function")) { |
97 | if (functions == null) { |
98 | functions = rule; |
99 | } else { |
100 | functions = new RuleList(rule, functions, true); |
101 | } |
102 | } else if (section.startsWith("Commands")) { |
103 | statements.add(head); |
104 | } |
105 | } |
106 | addRule("@func@", "Function", functions); |
107 | addFixedRule("@ymd@", RuleFixed.YMD); |
108 | addFixedRule("@hms@", RuleFixed.HMS); |
109 | addFixedRule("@nanos@", RuleFixed.NANOS); |
110 | addFixedRule("anything_except_single_quote", RuleFixed.ANY_EXCEPT_SINGLE_QUOTE); |
111 | addFixedRule("anything_except_double_quote", RuleFixed.ANY_EXCEPT_DOUBLE_QUOTE); |
112 | addFixedRule("anything_until_end_of_line", RuleFixed.ANY_UNTIL_EOL); |
113 | addFixedRule("anything_until_end_comment", RuleFixed.ANY_UNTIL_END); |
114 | addFixedRule("anything_except_two_dollar_signs", RuleFixed.ANY_EXCEPT_2_DOLLAR); |
115 | addFixedRule("anything", RuleFixed.ANY_WORD); |
116 | addFixedRule("@hex_start@", RuleFixed.HEX_START); |
117 | addFixedRule("@concat@", RuleFixed.CONCAT); |
118 | addFixedRule("@az_@", RuleFixed.AZ_UNDERSCORE); |
119 | addFixedRule("@af@", RuleFixed.AF); |
120 | addFixedRule("@digit@", RuleFixed.DIGIT); |
121 | addFixedRule("@open_bracket@", RuleFixed.OPEN_BRACKET); |
122 | addFixedRule("@close_bracket@", RuleFixed.CLOSE_BRACKET); |
123 | } |
124 | |
125 | /** |
126 | * Parse the syntax and let the rule call the visitor. |
127 | * |
128 | * @param visitor the visitor |
129 | * @param s the syntax to parse |
130 | */ |
131 | public void visit(BnfVisitor visitor, String s) { |
132 | this.syntax = s; |
133 | tokens = tokenize(); |
134 | index = 0; |
135 | Rule rule = parseRule(); |
136 | rule.setLinks(ruleMap); |
137 | rule.accept(visitor); |
138 | } |
139 | |
140 | /** |
141 | * Check whether the statement starts with a whitespace. |
142 | * |
143 | * @param s the statement |
144 | * @return if the statement is not empty and starts with a whitespace |
145 | */ |
146 | public static boolean startWithSpace(String s) { |
147 | return s.length() > 0 && Character.isWhitespace(s.charAt(0)); |
148 | } |
149 | |
150 | /** |
151 | * Convert convert ruleLink to rule_link. |
152 | * |
153 | * @param token the token |
154 | * @return the rule map key |
155 | */ |
156 | public static String getRuleMapKey(String token) { |
157 | StringBuilder buff = new StringBuilder(); |
158 | for (char ch : token.toCharArray()) { |
159 | if (Character.isUpperCase(ch)) { |
160 | buff.append('_').append(Character.toLowerCase(ch)); |
161 | } else { |
162 | buff.append(ch); |
163 | } |
164 | } |
165 | return buff.toString(); |
166 | } |
167 | |
168 | /** |
169 | * Get the rule head for the given title. |
170 | * |
171 | * @param title the title |
172 | * @return the rule head, or null |
173 | */ |
174 | public RuleHead getRuleHead(String title) { |
175 | return ruleMap.get(title); |
176 | } |
177 | |
178 | private Rule parseRule() { |
179 | read(); |
180 | return parseOr(); |
181 | } |
182 | |
183 | private Rule parseOr() { |
184 | Rule r = parseList(); |
185 | if (firstChar == '|') { |
186 | read(); |
187 | r = new RuleList(r, parseOr(), true); |
188 | } |
189 | lastRepeat = r; |
190 | return r; |
191 | } |
192 | |
193 | private Rule parseList() { |
194 | Rule r = parseToken(); |
195 | if (firstChar != '|' && firstChar != ']' && firstChar != '}' |
196 | && firstChar != 0) { |
197 | r = new RuleList(r, parseList(), false); |
198 | } |
199 | lastRepeat = r; |
200 | return r; |
201 | } |
202 | |
203 | private Rule parseToken() { |
204 | Rule r; |
205 | if ((firstChar >= 'A' && firstChar <= 'Z') |
206 | || (firstChar >= 'a' && firstChar <= 'z')) { |
207 | // r = new RuleElement(currentToken+ " syntax:" + syntax); |
208 | r = new RuleElement(currentToken, currentTopic); |
209 | } else if (firstChar == '[') { |
210 | read(); |
211 | Rule r2 = parseOr(); |
212 | r = new RuleOptional(r2); |
213 | if (firstChar != ']') { |
214 | throw new AssertionError("expected ], got " + currentToken |
215 | + " syntax:" + syntax); |
216 | } |
217 | } else if (firstChar == '{') { |
218 | read(); |
219 | r = parseOr(); |
220 | if (firstChar != '}') { |
221 | throw new AssertionError("expected }, got " + currentToken |
222 | + " syntax:" + syntax); |
223 | } |
224 | } else if ("@commaDots@".equals(currentToken)) { |
225 | r = new RuleList(new RuleElement(",", currentTopic), lastRepeat, false); |
226 | r = new RuleRepeat(r, true); |
227 | } else if ("@dots@".equals(currentToken)) { |
228 | r = new RuleRepeat(lastRepeat, false); |
229 | } else { |
230 | r = new RuleElement(currentToken, currentTopic); |
231 | } |
232 | lastRepeat = r; |
233 | read(); |
234 | return r; |
235 | } |
236 | |
237 | private void read() { |
238 | if (index < tokens.length) { |
239 | currentToken = tokens[index++]; |
240 | firstChar = currentToken.charAt(0); |
241 | } else { |
242 | currentToken = ""; |
243 | firstChar = 0; |
244 | } |
245 | } |
246 | |
247 | private String[] tokenize() { |
248 | ArrayList<String> list = New.arrayList(); |
249 | syntax = StringUtils.replaceAll(syntax, "yyyy-MM-dd", "@ymd@"); |
250 | syntax = StringUtils.replaceAll(syntax, "hh:mm:ss", "@hms@"); |
251 | syntax = StringUtils.replaceAll(syntax, "nnnnnnnnn", "@nanos@"); |
252 | syntax = StringUtils.replaceAll(syntax, "function", "@func@"); |
253 | syntax = StringUtils.replaceAll(syntax, "0x", "@hexStart@"); |
254 | syntax = StringUtils.replaceAll(syntax, ",...", "@commaDots@"); |
255 | syntax = StringUtils.replaceAll(syntax, "...", "@dots@"); |
256 | syntax = StringUtils.replaceAll(syntax, "||", "@concat@"); |
257 | syntax = StringUtils.replaceAll(syntax, "a-z|_", "@az_@"); |
258 | syntax = StringUtils.replaceAll(syntax, "A-Z|_", "@az_@"); |
259 | syntax = StringUtils.replaceAll(syntax, "A-F", "@af@"); |
260 | syntax = StringUtils.replaceAll(syntax, "0-9", "@digit@"); |
261 | syntax = StringUtils.replaceAll(syntax, "'['", "@openBracket@"); |
262 | syntax = StringUtils.replaceAll(syntax, "']'", "@closeBracket@"); |
263 | StringTokenizer tokenizer = getTokenizer(syntax); |
264 | while (tokenizer.hasMoreTokens()) { |
265 | String s = tokenizer.nextToken(); |
266 | // avoid duplicate strings |
267 | s = StringUtils.cache(s); |
268 | if (s.length() == 1) { |
269 | if (" \r\n".indexOf(s.charAt(0)) >= 0) { |
270 | continue; |
271 | } |
272 | } |
273 | list.add(s); |
274 | } |
275 | return list.toArray(new String[list.size()]); |
276 | } |
277 | |
278 | /** |
279 | * Get the list of tokens that can follow. |
280 | * This is the main autocomplete method. |
281 | * The returned map for the query 'S' may look like this: |
282 | * <pre> |
283 | * key: 1#SELECT, value: ELECT |
284 | * key: 1#SET, value: ET |
285 | * </pre> |
286 | * |
287 | * @param query the start of the statement |
288 | * @return the map of possible token types / tokens |
289 | */ |
290 | public HashMap<String, String> getNextTokenList(String query) { |
291 | Sentence sentence = new Sentence(); |
292 | sentence.setQuery(query); |
293 | try { |
294 | for (RuleHead head : statements) { |
295 | if (!head.getSection().startsWith("Commands")) { |
296 | continue; |
297 | } |
298 | sentence.start(); |
299 | if (head.getRule().autoComplete(sentence)) { |
300 | break; |
301 | } |
302 | } |
303 | } catch (IllegalStateException e) { |
304 | // ignore |
305 | } |
306 | return sentence.getNext(); |
307 | } |
308 | |
309 | /** |
310 | * Cross-link all statements with each other. |
311 | * This method is called after updating the topics. |
312 | */ |
313 | public void linkStatements() { |
314 | for (RuleHead r : ruleMap.values()) { |
315 | r.getRule().setLinks(ruleMap); |
316 | } |
317 | } |
318 | |
319 | /** |
320 | * Update a topic with a context specific rule. |
321 | * This is used for autocomplete support. |
322 | * |
323 | * @param topic the topic |
324 | * @param rule the database context rule |
325 | */ |
326 | public void updateTopic(String topic, DbContextRule rule) { |
327 | topic = StringUtils.toLowerEnglish(topic); |
328 | RuleHead head = ruleMap.get(topic); |
329 | if (head == null) { |
330 | head = new RuleHead("db", topic, rule); |
331 | ruleMap.put(topic, head); |
332 | statements.add(head); |
333 | } else { |
334 | head.setRule(rule); |
335 | } |
336 | } |
337 | |
338 | /** |
339 | * Get the list of possible statements. |
340 | * |
341 | * @return the list of statements |
342 | */ |
343 | public ArrayList<RuleHead> getStatements() { |
344 | return statements; |
345 | } |
346 | |
347 | /** |
348 | * Get the tokenizer for the given syntax. |
349 | * |
350 | * @param s the syntax |
351 | * @return the tokenizer |
352 | */ |
353 | public static StringTokenizer getTokenizer(String s) { |
354 | return new StringTokenizer(s, " [](){}|.,\r\n<>:-+*/=<\">!'$", true); |
355 | } |
356 | |
357 | } |