001 /* 002 * mesopotamia-java @mesopotamia.version@ 003 * Multilingual parser and repository. 004 * Copyright (C) 2005 Hammurapi Group 005 * 006 * This program is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 2 of the License, or (at your option) any later version. 010 * 011 * This program is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public 017 * License along with this library; if not, write to the Free Software 018 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 019 * 020 * URL: http://http://www.hammurapi.biz 021 * e-Mail: support@hammurapi.biz 022 */ 023 package org.mesopotamia.lang.java.v14; 024 025 import gnu.trove.TIntIntHashMap; 026 027 import java.io.ByteArrayInputStream; 028 import java.io.ByteArrayOutputStream; 029 import java.sql.SQLException; 030 import java.util.ArrayList; 031 import java.util.Collection; 032 import java.util.HashSet; 033 import java.util.Iterator; 034 import java.util.LinkedList; 035 import java.util.Set; 036 import java.util.logging.Level; 037 import java.util.logging.Logger; 038 import java.util.zip.GZIPInputStream; 039 import java.util.zip.GZIPOutputStream; 040 041 import javax.xml.parsers.DocumentBuilderFactory; 042 import javax.xml.transform.TransformerFactory; 043 import javax.xml.transform.dom.DOMSource; 044 import javax.xml.transform.stream.StreamResult; 045 046 import org.mesopotamia.Loader; 047 import org.mesopotamia.LoaderBase; 048 import org.mesopotamia.LoaderEntry; 049 import org.mesopotamia.MesopotamiaException; 050 import org.mesopotamia.MesopotamiaToken; 051 import org.mesopotamia.RepositoryFactory; 052 import org.mesopotamia.RepositoryLanguage; 053 import org.mesopotamia.SourceUnitLoader; 054 import org.mesopotamia.SyntaxTree; 055 import org.mesopotamia.lang.java.JavaDocComment; 056 import org.mesopotamia.lang.java.MesopotamiaJavaNode; 057 import org.mesopotamia.lang.java.MultiLineComment; 058 import org.mesopotamia.lang.java.SingleLineComment; 059 import org.mesopotamia.sql.LoadLevel; 060 import org.mesopotamia.sql.LoadLevelImpl; 061 import org.w3c.dom.Document; 062 import org.w3c.dom.Element; 063 import org.xml.sax.Attributes; 064 import org.xml.sax.ContentHandler; 065 import org.xml.sax.SAXException; 066 import org.xml.sax.SAXParseException; 067 import org.xml.sax.XMLReader; 068 import org.xml.sax.helpers.DefaultHandler; 069 070 import antlr.ASTFactory; 071 import antlr.Token; 072 import antlr.TokenStream; 073 import antlr.TokenStreamException; 074 import antlr.collections.AST; 075 import biz.hammurapi.xml.dom.AbstractDomObject; 076 077 public class AstLoader extends LoaderBase implements SourceUnitLoader { 078 private static final String JAVA_DOC_START = "/**"; 079 private static final String ELEMENT_NODE = "node"; 080 private static final String ELEMENT_MULTI_LINE_COMMENT = "multi-line-comment"; 081 private static final String ELEMENT_SINGLE_LINE_COMMENT = "single-line-comment"; 082 private static final String ATTRIBUTE_TEXT = "text"; 083 private static final String ATTRIBUTE_LAST_TOKEN = "last-token"; 084 private static final String ATTRIBUTE_FIRST_TOKEN = "first-token"; 085 private static final String ATTRIBUTE_TOKEN = "token"; 086 private static final String ATTRIBUTE_SAME_TYPE_INDEX = "same-type-index"; 087 private static final String ATTRIBUTE_TYPE = "type"; 088 private static final String ATTRIBUTE_ID = "id"; 089 private static final String ATTRIBUTE_LEFT_COLUMN = "left-column"; 090 private static final String ATTRIBUTE_LINE = "line"; 091 private static final String ATTRIBUTE_COL = "col"; 092 private static final Logger logger = Logger.getLogger(AstLoader.class.getName()); 093 private LoaderEntry xData; 094 095 public AstLoader(RepositoryLanguage repoLanguage, LoaderEntry xData) { 096 super(repoLanguage, xData); 097 this.xData = xData; 098 } 099 100 public boolean load(int scanId, final int sourceUnitId, Object environment) { 101 final RepositoryFactory factory = repoLanguage.getFactory(); 102 //final SourceUnit si = (SourceUnit) sit.next(); 103 final String sourceUnitName = "Source unit #"+sourceUnitId; 104 logger.fine("Loading source unit '"+sourceUnitName+"' to level "+xData.getLevel()); 105 106 try { 107 LoadLevelImpl lli = new LoadLevelImpl(true); 108 lli.setLevelId(xData.getId()); 109 lli.setSourceUnitId(sourceUnitId); 110 try { 111 Loader tokenLoader = repoLanguage.getLoader(ATTRIBUTE_TOKEN); 112 Collection<Token> tokens = (Collection<Token>) tokenLoader.getData(sourceUnitId, new Integer(scanId)); 113 Collection<Token> nonWhiteSpace = null; 114 if (tokens!=null) { 115 nonWhiteSpace = new ArrayList<Token>(tokens); 116 Iterator<Token> it = nonWhiteSpace.iterator(); 117 while (it.hasNext()) { 118 Object t = it.next(); 119 if (t instanceof MesopotamiaToken && ((MesopotamiaToken) t).isWhitespace()) { 120 it.remove(); 121 } 122 } 123 } 124 125 if (nonWhiteSpace!=null && !nonWhiteSpace.isEmpty()) { 126 nonWhiteSpace.add(new Token(JavaTokenTypes.EOF)); 127 final Iterator<Token> tit = nonWhiteSpace.iterator(); 128 JavaRecognizer parser = new JavaRecognizer(new TokenStream() { 129 130 public Token nextToken() throws TokenStreamException { 131 return (Token) (tit.hasNext() ? tit.next() : null); 132 } 133 134 }); 135 136 ASTFactory astFactory = new ASTFactory(); 137 astFactory.setASTNodeClass(biz.hammurapi.antlr.AST.class); 138 parser.setASTFactory(astFactory); 139 140 parser.compilationUnit(); 141 TIntIntHashMap sameTypeIndices = new TIntIntHashMap(); 142 AST compilationUnitAst = parser.getAST(); 143 // Browser.show(new AstVisualizable(null, compilationUnitAst, parser.getTokenNames(), true), "CU"); 144 145 Document doc=DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); 146 Element rootElement = AbstractDomObject.addElement(doc, "syntax-tree"); 147 int[] counter = {0}; 148 149 Set<Integer> usedComments = new HashSet<Integer>(); 150 for (AST node = compilationUnitAst; node != null; node = node.getNextSibling()) { 151 int sameTypeIndex = sameTypeIndices.get(node.getType()); 152 sameTypeIndices.put(node.getType(), sameTypeIndex + 1); 153 ((biz.hammurapi.antlr.AST) node).queryChildren(); 154 store(rootElement, (biz.hammurapi.antlr.AST) node, sameTypeIndex, counter, usedComments); 155 } 156 157 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 158 GZIPOutputStream gzos = new GZIPOutputStream(baos); 159 TransformerFactory.newInstance().newTransformer().transform(new DOMSource(doc), new StreamResult(gzos)); 160 // TransformerFactory.newInstance().newTransformer().transform(new DOMSource(doc), new StreamResult(System.out)); 161 gzos.close(); 162 lli.setLoaderData(baos.toByteArray()); 163 } 164 return true; 165 } catch (Exception e) { 166 lli.setLoadFailed(true); 167 lli.setMessageId(storeErrorMessage(scanId, sourceUnitId, e)); 168 logger.log(Level.WARNING, "Cannot load source unit '"+sourceUnitName+"' to level "+xData.getLevel()+": "+e, e); 169 return false; 170 } finally { 171 factory.getEngine().insertLoadLevel(lli); 172 } 173 } catch (SQLException e) { 174 factory.consume(this, e); 175 return false; 176 } 177 } 178 179 private void store(Element owner, biz.hammurapi.antlr.AST ast, int sameTypeIndex, int[] counter, Set<Integer> usedComments) throws SQLException { 180 Element holder = owner.getOwnerDocument().createElement(ELEMENT_NODE); 181 owner.appendChild(holder); 182 183 holder.setAttribute(ATTRIBUTE_COL, String.valueOf(ast.getColumn())); 184 holder.setAttribute(ATTRIBUTE_LINE, String.valueOf(ast.getLine())); 185 holder.setAttribute(ATTRIBUTE_LEFT_COLUMN, String.valueOf(ast.getLeftColumn())); 186 holder.setAttribute(ATTRIBUTE_ID, String.valueOf(counter[0]++)); 187 holder.setAttribute(ATTRIBUTE_TYPE, String.valueOf(repoLanguage.tokenType2id(ast.getType()))); 188 holder.setAttribute(ATTRIBUTE_SAME_TYPE_INDEX, String.valueOf(sameTypeIndex)); 189 190 if (ast.getToken()!=null) { 191 holder.setAttribute(ATTRIBUTE_TOKEN, String.valueOf(((MesopotamiaToken) ast.getToken()).getId())); 192 } 193 if (ast.getFirstToken()!=null) { 194 holder.setAttribute(ATTRIBUTE_FIRST_TOKEN, String.valueOf(((MesopotamiaToken) ast.getFirstToken()).getId())); 195 } 196 if (ast.getLastToken()!=null) { 197 holder.setAttribute(ATTRIBUTE_LAST_TOKEN, String.valueOf(((MesopotamiaToken) ast.getLastToken()).getId())); 198 } 199 if (ast.getText()!=null) { 200 holder.setAttribute(ATTRIBUTE_TEXT, ast.getText()); 201 } 202 203 if (ast.getType()!=JavaTokenTypes.OBJBLOCK) { 204 LinkedList<MesopotamiaToken> comments = new LinkedList<MesopotamiaToken>(); 205 MesopotamiaToken firstToken = (MesopotamiaToken) ast.getFirstToken(); 206 MesopotamiaToken wsToken = (MesopotamiaToken) (firstToken==null ? null : firstToken.getPrevToken()); 207 while (wsToken!=null && wsToken.isWhitespace()) { 208 if (wsToken.getType()==JavaTokenTypes.SL_COMMENT || wsToken.getType()==JavaTokenTypes.ML_COMMENT) { 209 if (usedComments.add(wsToken.getId())) { 210 comments.addFirst(wsToken); 211 } 212 } 213 wsToken = (MesopotamiaToken) wsToken.getPrevToken(); 214 } 215 216 for (MesopotamiaToken comment: comments) { 217 Element ce; 218 if (comment.getType()==JavaTokenTypes.SL_COMMENT) { 219 ce = AbstractDomObject.addTextElement(holder, ELEMENT_SINGLE_LINE_COMMENT, comment.getText()); 220 } else { 221 ce = AbstractDomObject.addTextElement(holder, ELEMENT_MULTI_LINE_COMMENT, comment.getText()); 222 } 223 ce.setAttribute(ATTRIBUTE_LINE, String.valueOf(comment.getLine())); 224 ce.setAttribute(ATTRIBUTE_COL, String.valueOf(comment.getColumn())); 225 } 226 } 227 228 TIntIntHashMap sameTypeIndices = new TIntIntHashMap(); 229 int childPosition = 0; 230 for (AST child = ast.getFirstChild(); child != null; child = child.getNextSibling(), ++childPosition) { 231 int childSameTypeIndex = sameTypeIndices.get(child.getType()); 232 sameTypeIndices.put(child.getType(), childSameTypeIndex + 1); 233 store(holder, (biz.hammurapi.antlr.AST) child, childSameTypeIndex, counter, usedComments); 234 } 235 } 236 237 /** 238 * Returns SyntaxTree 239 */ 240 public Object getData(final int sourceUnitId, Integer scanId) throws MesopotamiaException { 241 RepositoryFactory factory=repoLanguage.getFactory(); 242 try { 243 LoadLevel loadLevel = factory.getEngine().getSourceUnitLoadLevel(data.getId(), sourceUnitId); 244 if (loadLevel==null) { 245 return null; 246 } 247 248 byte[] loaderData = loadLevel.getLoaderData(); 249 if (loaderData==null) { 250 return null; 251 } 252 253 javax.xml.parsers.SAXParserFactory saxParserFactory = javax.xml.parsers.SAXParserFactory.newInstance(); 254 javax.xml.parsers.SAXParser saxParser = saxParserFactory.newSAXParser(); 255 XMLReader parser = saxParser.getXMLReader(); 256 final SyntaxTree ret = new SyntaxTree(); 257 ContentHandler ch = new DefaultHandler() { 258 MesopotamiaJavaNode currentNode; 259 StringBuilder currentComment; 260 int currentCommentLine; 261 int currentCommentColumn; 262 263 /** 264 * Writes text to node, nullifies currentText. 265 */ 266 public void endElement(String uri, String localName, String qName) throws SAXException { 267 if (ELEMENT_NODE.equals(qName)) { 268 if (currentNode!=null) { 269 currentNode=(MesopotamiaJavaNode) currentNode.getParent(); 270 } 271 } else if (ELEMENT_MULTI_LINE_COMMENT.equals(qName)) { 272 String text = currentComment.toString(); 273 if (text!=null && text.startsWith(JAVA_DOC_START)) { 274 currentNode.addComment(new JavaDocComment(currentCommentLine, currentCommentColumn, text)); 275 } else { 276 currentNode.addComment(new MultiLineComment(currentCommentLine, currentCommentColumn, text)); 277 } 278 currentComment = null; 279 } else if (ELEMENT_SINGLE_LINE_COMMENT.equals(qName)) { 280 currentNode.addComment(new SingleLineComment(currentCommentLine, currentCommentColumn, currentComment.toString())); 281 currentComment = null; 282 } 283 } 284 285 public void error(SAXParseException e) throws SAXException { 286 throw e; 287 } 288 289 public void fatalError(SAXParseException e) throws SAXException { 290 throw e; 291 } 292 293 public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { 294 if (ELEMENT_NODE.equals(qName)) { 295 int tokenType = Integer.parseInt(attributes.getValue(ATTRIBUTE_TYPE)); 296 MesopotamiaJavaNode newNode = new MesopotamiaJavaNode(repoLanguage.tokenTypeId2name(tokenType), currentNode); 297 298 newNode.setColumn(Integer.parseInt(attributes.getValue(ATTRIBUTE_COL))); 299 newNode.setLine(Integer.parseInt(attributes.getValue(ATTRIBUTE_LINE))); 300 newNode.setType(tokenType); 301 newNode.setSameTypeIndex(Integer.parseInt(attributes.getValue(ATTRIBUTE_SAME_TYPE_INDEX))); 302 newNode.setLeftColumn(Integer.parseInt(attributes.getValue(ATTRIBUTE_LEFT_COLUMN))); 303 newNode.setId(Integer.parseInt(attributes.getValue(ATTRIBUTE_ID))); 304 newNode.setText(attributes.getValue(ATTRIBUTE_TEXT)); 305 newNode.setSourceUnitId(sourceUnitId); 306 ret.mapNode(newNode); 307 308 309 String ft = attributes.getValue(ATTRIBUTE_FIRST_TOKEN); 310 if (ft!=null) { 311 newNode.setFirstToken(new Integer(ft)); 312 } 313 314 String lt = attributes.getValue(ATTRIBUTE_LAST_TOKEN); 315 if (lt!=null) { 316 newNode.setLastToken(new Integer(lt)); 317 } 318 319 String t = attributes.getValue(ATTRIBUTE_TOKEN); 320 if (t!=null) { 321 newNode.setToken(new Integer(t)); 322 } 323 324 if (currentNode==null) { 325 newNode.setPosition(ret.getRoots().size()); 326 ret.getRoots().add(newNode); 327 } else { 328 newNode.setPosition(currentNode.getChildren().size()); 329 currentNode.getChildren().add(newNode); 330 } 331 currentNode=newNode; 332 } else if (ELEMENT_MULTI_LINE_COMMENT.equals(qName) || ELEMENT_SINGLE_LINE_COMMENT.equals(qName)) { 333 currentComment = new StringBuilder(); 334 currentCommentLine = Integer.parseInt(attributes.getValue(ATTRIBUTE_LINE)); 335 currentCommentColumn = Integer.parseInt(attributes.getValue(ATTRIBUTE_COL)); 336 } 337 } 338 339 @Override 340 public void characters(char[] ch, int start, int length) throws SAXException { 341 if (currentComment==null) { 342 super.characters(ch, start, length); 343 } else { 344 currentComment.append(ch, start, length); 345 } 346 } 347 348 }; 349 parser.setContentHandler(ch); 350 parser.parse( new org.xml.sax.InputSource(new GZIPInputStream(new ByteArrayInputStream(loaderData)))); 351 return ret; 352 } catch (Exception e) { 353 throw new MesopotamiaException("Cannot load AST: "+e, e); 354 } 355 } 356 }