001 /* 002 * mesopotamia-java @mesopotamia.version@ 003 * Multilingual parser and repository. 004 * Copyright (C) 2005 Hammurapi Group 005 * 006 * This program is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 2 of the License, or (at your option) any later version. 010 * 011 * This program is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public 017 * License along with this library; if not, write to the Free Software 018 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 019 * 020 * URL: http://http://www.hammurapi.biz 021 * e-Mail: support@hammurapi.biz 022 */ 023 package org.mesopotamia.lang.java.v5; 024 025 import gnu.trove.TIntIntHashMap; 026 027 import java.io.ByteArrayInputStream; 028 import java.io.ByteArrayOutputStream; 029 import java.sql.SQLException; 030 import java.util.ArrayList; 031 import java.util.Collection; 032 import java.util.HashSet; 033 import java.util.Iterator; 034 import java.util.LinkedList; 035 import java.util.Set; 036 import java.util.logging.Level; 037 import java.util.logging.Logger; 038 import java.util.zip.GZIPInputStream; 039 import java.util.zip.GZIPOutputStream; 040 041 import javax.xml.parsers.DocumentBuilderFactory; 042 import javax.xml.transform.TransformerFactory; 043 import javax.xml.transform.dom.DOMSource; 044 import javax.xml.transform.stream.StreamResult; 045 046 import org.mesopotamia.Loader; 047 import org.mesopotamia.LoaderBase; 048 import org.mesopotamia.LoaderEntry; 049 import org.mesopotamia.MesopotamiaException; 050 import org.mesopotamia.MesopotamiaToken; 051 import org.mesopotamia.RepositoryFactory; 052 import org.mesopotamia.RepositoryLanguage; 053 import org.mesopotamia.SourceUnitLoader; 054 import org.mesopotamia.SyntaxTree; 055 import org.mesopotamia.lang.java.JavaDocComment; 056 import org.mesopotamia.lang.java.MesopotamiaJavaNode; 057 import org.mesopotamia.lang.java.MultiLineComment; 058 import org.mesopotamia.lang.java.SingleLineComment; 059 import org.mesopotamia.sql.LoadLevel; 060 import org.mesopotamia.sql.LoadLevelImpl; 061 import org.w3c.dom.Document; 062 import org.w3c.dom.Element; 063 import org.xml.sax.Attributes; 064 import org.xml.sax.ContentHandler; 065 import org.xml.sax.SAXException; 066 import org.xml.sax.SAXParseException; 067 import org.xml.sax.XMLReader; 068 import org.xml.sax.helpers.DefaultHandler; 069 070 import antlr.ASTFactory; 071 import antlr.Token; 072 import antlr.TokenStream; 073 import antlr.TokenStreamException; 074 import antlr.collections.AST; 075 import biz.hammurapi.antlr.AstVisualizable; 076 import biz.hammurapi.swing.Browser; 077 import biz.hammurapi.xml.dom.AbstractDomObject; 078 079 public class AstLoader extends LoaderBase implements SourceUnitLoader { 080 private static final String JAVA_DOC_START = "/**"; 081 private static final String ELEMENT_NODE = "node"; 082 private static final String ELEMENT_MULTI_LINE_COMMENT = "multi-line-comment"; 083 private static final String ELEMENT_SINGLE_LINE_COMMENT = "single-line-comment"; 084 private static final String ATTRIBUTE_TEXT = "text"; 085 private static final String ATTRIBUTE_LAST_TOKEN = "last-token"; 086 private static final String ATTRIBUTE_FIRST_TOKEN = "first-token"; 087 private static final String ATTRIBUTE_TOKEN = "token"; 088 private static final String ATTRIBUTE_SAME_TYPE_INDEX = "same-type-index"; 089 private static final String ATTRIBUTE_TYPE = "type"; 090 private static final String ATTRIBUTE_ID = "id"; 091 private static final String ATTRIBUTE_LEFT_COLUMN = "left-column"; 092 private static final String ATTRIBUTE_LINE = "line"; 093 private static final String ATTRIBUTE_COL = "col"; 094 private static final Logger logger = Logger.getLogger(AstLoader.class.getName()); 095 private LoaderEntry xData; 096 097 public AstLoader(RepositoryLanguage repoLanguage, LoaderEntry xData) { 098 super(repoLanguage, xData); 099 this.xData = xData; 100 } 101 102 public boolean load(int scanId, final int sourceUnitId, Object environment) { 103 final RepositoryFactory factory = repoLanguage.getFactory(); 104 //final SourceUnit si = (SourceUnit) sit.next(); 105 final String sourceUnitName = "Source unit #"+sourceUnitId; 106 logger.fine("Loading source unit '"+sourceUnitName+"' to level "+xData.getLevel()); 107 108 try { 109 LoadLevelImpl lli = new LoadLevelImpl(true); 110 lli.setLevelId(xData.getId()); 111 lli.setSourceUnitId(sourceUnitId); 112 try { 113 Loader tokenLoader = repoLanguage.getLoader(ATTRIBUTE_TOKEN); 114 Collection<Token> tokens = (Collection<Token>) tokenLoader.getData(sourceUnitId, new Integer(scanId)); 115 Collection<Token> nonWhiteSpace = null; 116 if (tokens!=null) { 117 nonWhiteSpace = new ArrayList<Token>(tokens); 118 Iterator<Token> it = nonWhiteSpace.iterator(); 119 while (it.hasNext()) { 120 Object t = it.next(); 121 if (t instanceof MesopotamiaToken && ((MesopotamiaToken) t).isWhitespace()) { 122 it.remove(); 123 } 124 } 125 } 126 127 if (nonWhiteSpace!=null && !nonWhiteSpace.isEmpty()) { 128 nonWhiteSpace.add(new Token(JavaTokenTypes.EOF)); 129 final Iterator<Token> tit = nonWhiteSpace.iterator(); 130 JavaRecognizer parser = new JavaRecognizer(new TokenStream() { 131 132 public Token nextToken() throws TokenStreamException { 133 return (Token) (tit.hasNext() ? tit.next() : null); 134 } 135 136 }); 137 138 ASTFactory astFactory = new ASTFactory(); 139 astFactory.setASTNodeClass(biz.hammurapi.antlr.AST.class); 140 parser.setASTFactory(astFactory); 141 142 parser.compilationUnit(); 143 TIntIntHashMap sameTypeIndices = new TIntIntHashMap(); 144 AST compilationUnitAst = parser.getAST(); 145 // Browser.show(new AstVisualizable(null, compilationUnitAst, parser.getTokenNames(), true), "CU"); 146 // ((biz.hammurapi.antlr.AST) compilationUnitAst).showWithSiblings("Compilation unit", parser.getTokenNames()); 147 148 Document doc=DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); 149 Element rootElement = AbstractDomObject.addElement(doc, "syntax-tree"); 150 int[] counter = {0}; 151 152 Set<Integer> usedComments = new HashSet<Integer>(); 153 for (AST node = compilationUnitAst; node != null; node = node.getNextSibling()) { 154 int sameTypeIndex = sameTypeIndices.get(node.getType()); 155 sameTypeIndices.put(node.getType(), sameTypeIndex + 1); 156 ((biz.hammurapi.antlr.AST) node).queryChildren(); 157 store(rootElement, (biz.hammurapi.antlr.AST) node, sameTypeIndex, counter, usedComments); 158 } 159 160 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 161 GZIPOutputStream gzos = new GZIPOutputStream(baos); 162 TransformerFactory.newInstance().newTransformer().transform(new DOMSource(doc), new StreamResult(gzos)); 163 // TransformerFactory.newInstance().newTransformer().transform(new DOMSource(doc), new StreamResult(System.out)); 164 gzos.close(); 165 lli.setLoaderData(baos.toByteArray()); 166 } 167 return true; 168 } catch (Exception e) { 169 lli.setLoadFailed(true); 170 lli.setMessageId(storeErrorMessage(scanId, sourceUnitId, e)); 171 logger.log(Level.WARNING, "Cannot load source unit '"+sourceUnitName+"' to level "+xData.getLevel()+": "+e, e); 172 return false; 173 } finally { 174 factory.getEngine().insertLoadLevel(lli); 175 } 176 } catch (SQLException e) { 177 factory.consume(this, e); 178 return false; 179 } 180 } 181 182 private void store(Element owner, biz.hammurapi.antlr.AST ast, int sameTypeIndex, int[] counter, Set<Integer> usedComments) throws SQLException { 183 Element holder = owner.getOwnerDocument().createElement(ELEMENT_NODE); 184 owner.appendChild(holder); 185 186 holder.setAttribute(ATTRIBUTE_COL, String.valueOf(ast.getColumn())); 187 holder.setAttribute(ATTRIBUTE_LINE, String.valueOf(ast.getLine())); 188 holder.setAttribute(ATTRIBUTE_LEFT_COLUMN, String.valueOf(ast.getLeftColumn())); 189 holder.setAttribute(ATTRIBUTE_ID, String.valueOf(counter[0]++)); 190 holder.setAttribute(ATTRIBUTE_TYPE, String.valueOf(repoLanguage.tokenType2id(ast.getType()))); 191 holder.setAttribute(ATTRIBUTE_SAME_TYPE_INDEX, String.valueOf(sameTypeIndex)); 192 193 if (ast.getToken()!=null) { 194 holder.setAttribute(ATTRIBUTE_TOKEN, String.valueOf(((MesopotamiaToken) ast.getToken()).getId())); 195 } 196 if (ast.getFirstToken()!=null) { 197 holder.setAttribute(ATTRIBUTE_FIRST_TOKEN, String.valueOf(((MesopotamiaToken) ast.getFirstToken()).getId())); 198 } 199 if (ast.getLastToken()!=null) { 200 holder.setAttribute(ATTRIBUTE_LAST_TOKEN, String.valueOf(((MesopotamiaToken) ast.getLastToken()).getId())); 201 } 202 if (ast.getText()!=null) { 203 holder.setAttribute(ATTRIBUTE_TEXT, ast.getText()); 204 } 205 206 if (ast.getType()!=JavaTokenTypes.OBJBLOCK) { 207 LinkedList<MesopotamiaToken> comments = new LinkedList<MesopotamiaToken>(); 208 MesopotamiaToken firstToken = (MesopotamiaToken) ast.getFirstToken(); 209 MesopotamiaToken wsToken = (MesopotamiaToken) (firstToken==null ? null : firstToken.getPrevToken()); 210 while (wsToken!=null && wsToken.isWhitespace()) { 211 if (wsToken.getType()==JavaTokenTypes.SL_COMMENT || wsToken.getType()==JavaTokenTypes.ML_COMMENT) { 212 if (usedComments.add(wsToken.getId())) { 213 comments.addFirst(wsToken); 214 } 215 } 216 wsToken = (MesopotamiaToken) wsToken.getPrevToken(); 217 } 218 219 for (MesopotamiaToken comment: comments) { 220 Element ce; 221 if (comment.getType()==JavaTokenTypes.SL_COMMENT) { 222 ce = AbstractDomObject.addTextElement(holder, ELEMENT_SINGLE_LINE_COMMENT, comment.getText()); 223 } else { 224 ce = AbstractDomObject.addTextElement(holder, ELEMENT_MULTI_LINE_COMMENT, comment.getText()); 225 } 226 ce.setAttribute(ATTRIBUTE_LINE, String.valueOf(comment.getLine())); 227 ce.setAttribute(ATTRIBUTE_COL, String.valueOf(comment.getColumn())); 228 } 229 } 230 231 TIntIntHashMap sameTypeIndices = new TIntIntHashMap(); 232 int childPosition = 0; 233 for (AST child = ast.getFirstChild(); child != null; child = child.getNextSibling(), ++childPosition) { 234 int childSameTypeIndex = sameTypeIndices.get(child.getType()); 235 sameTypeIndices.put(child.getType(), childSameTypeIndex + 1); 236 store(holder, (biz.hammurapi.antlr.AST) child, childSameTypeIndex, counter, usedComments); 237 } 238 } 239 240 /** 241 * Returns SyntaxTree 242 */ 243 public Object getData(final int sourceUnitId, Integer scanId) throws MesopotamiaException { 244 RepositoryFactory factory=repoLanguage.getFactory(); 245 try { 246 LoadLevel loadLevel = factory.getEngine().getSourceUnitLoadLevel(data.getId(), sourceUnitId); 247 if (loadLevel==null) { 248 return null; 249 } 250 251 byte[] loaderData = loadLevel.getLoaderData(); 252 if (loaderData==null) { 253 return null; 254 } 255 256 javax.xml.parsers.SAXParserFactory saxParserFactory = javax.xml.parsers.SAXParserFactory.newInstance(); 257 javax.xml.parsers.SAXParser saxParser = saxParserFactory.newSAXParser(); 258 XMLReader parser = saxParser.getXMLReader(); 259 final SyntaxTree ret = new SyntaxTree(); 260 ContentHandler ch = new DefaultHandler() { 261 MesopotamiaJavaNode currentNode; 262 StringBuilder currentComment; 263 int currentCommentLine; 264 int currentCommentColumn; 265 266 /** 267 * Writes text to node, nullifies currentText. 268 */ 269 public void endElement(String uri, String localName, String qName) throws SAXException { 270 if (ELEMENT_NODE.equals(qName)) { 271 if (currentNode!=null) { 272 currentNode=(MesopotamiaJavaNode) currentNode.getParent(); 273 } 274 } else if (ELEMENT_MULTI_LINE_COMMENT.equals(qName)) { 275 String text = currentComment.toString(); 276 if (text!=null && text.startsWith(JAVA_DOC_START)) { 277 currentNode.addComment(new JavaDocComment(currentCommentLine, currentCommentColumn, text)); 278 } else { 279 currentNode.addComment(new MultiLineComment(currentCommentLine, currentCommentColumn, text)); 280 } 281 currentComment = null; 282 } else if (ELEMENT_SINGLE_LINE_COMMENT.equals(qName)) { 283 currentNode.addComment(new SingleLineComment(currentCommentLine, currentCommentColumn, currentComment.toString())); 284 currentComment = null; 285 } 286 } 287 288 public void error(SAXParseException e) throws SAXException { 289 throw e; 290 } 291 292 public void fatalError(SAXParseException e) throws SAXException { 293 throw e; 294 } 295 296 public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { 297 if (ELEMENT_NODE.equals(qName)) { 298 int tokenType = Integer.parseInt(attributes.getValue(ATTRIBUTE_TYPE)); 299 MesopotamiaJavaNode newNode = new MesopotamiaJavaNode(repoLanguage.tokenTypeId2name(tokenType), currentNode); 300 301 newNode.setColumn(Integer.parseInt(attributes.getValue(ATTRIBUTE_COL))); 302 newNode.setLine(Integer.parseInt(attributes.getValue(ATTRIBUTE_LINE))); 303 newNode.setType(tokenType); 304 newNode.setSameTypeIndex(Integer.parseInt(attributes.getValue(ATTRIBUTE_SAME_TYPE_INDEX))); 305 newNode.setLeftColumn(Integer.parseInt(attributes.getValue(ATTRIBUTE_LEFT_COLUMN))); 306 newNode.setId(Integer.parseInt(attributes.getValue(ATTRIBUTE_ID))); 307 newNode.setText(attributes.getValue(ATTRIBUTE_TEXT)); 308 newNode.setSourceUnitId(sourceUnitId); 309 ret.mapNode(newNode); 310 311 312 String ft = attributes.getValue(ATTRIBUTE_FIRST_TOKEN); 313 if (ft!=null) { 314 newNode.setFirstToken(new Integer(ft)); 315 } 316 317 String lt = attributes.getValue(ATTRIBUTE_LAST_TOKEN); 318 if (lt!=null) { 319 newNode.setLastToken(new Integer(lt)); 320 } 321 322 String t = attributes.getValue(ATTRIBUTE_TOKEN); 323 if (t!=null) { 324 newNode.setToken(new Integer(t)); 325 } 326 327 if (currentNode==null) { 328 newNode.setPosition(ret.getRoots().size()); 329 ret.getRoots().add(newNode); 330 } else { 331 newNode.setPosition(currentNode.getChildren().size()); 332 currentNode.getChildren().add(newNode); 333 } 334 currentNode=newNode; 335 } else if (ELEMENT_MULTI_LINE_COMMENT.equals(qName) || ELEMENT_SINGLE_LINE_COMMENT.equals(qName)) { 336 currentComment = new StringBuilder(); 337 currentCommentLine = Integer.parseInt(attributes.getValue(ATTRIBUTE_LINE)); 338 currentCommentColumn = Integer.parseInt(attributes.getValue(ATTRIBUTE_COL)); 339 } 340 } 341 342 @Override 343 public void characters(char[] ch, int start, int length) throws SAXException { 344 if (currentComment==null) { 345 super.characters(ch, start, length); 346 } else { 347 currentComment.append(ch, start, length); 348 } 349 } 350 351 }; 352 parser.setContentHandler(ch); 353 parser.parse( new org.xml.sax.InputSource(new GZIPInputStream(new ByteArrayInputStream(loaderData)))); 354 return ret; 355 } catch (Exception e) { 356 throw new MesopotamiaException("Cannot load AST: "+e, e); 357 } 358 } 359 }