001 /* 002 * mesopotamia-java @mesopotamia.version@ 003 * Multilingual parser and repository. 004 * Copyright (C) 2005 Hammurapi Group 005 * 006 * This program is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 2 of the License, or (at your option) any later version. 010 * 011 * This program is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public 017 * License along with this library; if not, write to the Free Software 018 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 019 * 020 * URL: http://http://www.hammurapi.biz 021 * e-Mail: support@hammurapi.biz 022 */ 023 package org.mesopotamia.lang.java.v14; 024 025 import java.io.ByteArrayInputStream; 026 import java.io.ByteArrayOutputStream; 027 import java.io.Reader; 028 import java.sql.SQLException; 029 import java.util.ArrayList; 030 import java.util.Collection; 031 import java.util.logging.Level; 032 import java.util.logging.Logger; 033 import java.util.zip.GZIPInputStream; 034 import java.util.zip.GZIPOutputStream; 035 036 import javax.xml.parsers.DocumentBuilderFactory; 037 import javax.xml.transform.TransformerFactory; 038 import javax.xml.transform.dom.DOMSource; 039 import javax.xml.transform.stream.StreamResult; 040 041 import org.mesopotamia.LoaderBase; 042 import org.mesopotamia.LoaderEntry; 043 import org.mesopotamia.MesopotamiaException; 044 import org.mesopotamia.MesopotamiaToken; 045 import org.mesopotamia.RepositoryFactory; 046 import org.mesopotamia.RepositoryLanguage; 047 import org.mesopotamia.Source; 048 import org.mesopotamia.SourceLoader; 049 import org.mesopotamia.lang.java.CheatingFilterReader; 050 import org.mesopotamia.sql.LoadLevel; 051 import org.mesopotamia.sql.LoadLevelImpl; 052 import org.w3c.dom.Document; 053 import org.w3c.dom.Element; 054 import org.w3c.dom.Node; 055 import org.w3c.dom.NodeList; 056 057 import antlr.CharStreamException; 058 import antlr.Token; 059 import antlr.TokenStreamException; 060 import biz.hammurapi.convert.ConvertingService; 061 import biz.hammurapi.xml.dom.AbstractDomObject; 062 063 064 public class TokenLoader extends LoaderBase implements SourceLoader { 065 private static final Logger logger = Logger.getLogger(TokenLoader.class.getName()); 066 067 public TokenLoader(RepositoryLanguage repoLanguage, LoaderEntry xData) { 068 super(repoLanguage, xData); 069 } 070 071 public boolean load(int scanId, int sourceUnitId, Source source, Object environment) { 072 RepositoryFactory factory=repoLanguage.getFactory(); 073 try { 074 LoadLevelImpl lli = new LoadLevelImpl(true); 075 lli.setLevelId(data.getId()); 076 lli.setSourceUnitId(sourceUnitId); 077 try { 078 Object rSource = source.get(); 079 Reader r = (Reader) ConvertingService.convert(rSource, Reader.class); 080 try { 081 // CheatingFilterReader is needed because of a bug in Java 082 // gramma - it stucks if last token in the file 083 // is SL_COMMENT 084 085 JavaLexer lexer = new JavaLexer(new CheatingFilterReader(r)) { 086 /** 087 * Replacement for generated mIDENT to recognize international characters. 088 */ 089 public Token nextToken() throws TokenStreamException { 090 try { 091 if (Character.isJavaIdentifierStart(LA(1))) { 092 resetText(); 093 int _begin = text.length(); 094 consume(); 095 096 while (Character.isJavaIdentifierPart(LA(1))) { 097 consume(); 098 } 099 100 Token ret = makeToken(testLiteralsTable(IDENT)); 101 ret.setText(new String(text.getBuffer(), _begin, text.length() - _begin)); 102 103 return ret; 104 } 105 106 return super.nextToken(); 107 } catch (CharStreamException e) { 108 throw new TokenStreamException(e.toString()); 109 } 110 } 111 }; 112 113 Document doc=DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); 114 Element rootElement = AbstractDomObject.addElement(doc, "tokens"); 115 116 Token token; 117 for (int i = 0; (token = lexer.nextToken()).getType() != JavaTokenTypes.EOF; i++) { 118 Element te = doc.createElement("token"); 119 rootElement.appendChild(te); 120 te.setAttribute("col", String.valueOf(token.getColumn())); 121 te.setAttribute("line", String.valueOf(token.getLine())); 122 te.setAttribute("type", String.valueOf(repoLanguage.tokenType2id(token.getType()))); 123 if (token.getText()!=null) { 124 te.appendChild(doc.createTextNode(token.getText())); 125 } 126 } 127 128 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 129 GZIPOutputStream gzos = new GZIPOutputStream(baos); 130 TransformerFactory.newInstance().newTransformer().transform(new DOMSource(doc), new StreamResult(gzos)); 131 gzos.close(); 132 lli.setLoaderData(baos.toByteArray()); 133 134 return true; 135 } finally { 136 r.close(); 137 } 138 } catch (Exception e) { 139 lli.setLoadFailed(true); 140 lli.setMessageId(storeErrorMessage(scanId, sourceUnitId, e)); 141 logger.log(Level.WARNING, "Cannot load source '"+source+"' to level "+data.getLevel()+": "+e, e); 142 return false; 143 } finally { 144 factory.getEngine().insertLoadLevel(lli); 145 } 146 } catch (SQLException e) { 147 factory.consume(this, e); 148 return false; 149 } 150 } 151 152 /** 153 * Reads tokens from BLOB and returns collection of biz.hammurapi.antlr.Token instances 154 */ 155 public Object getData(int sourceUnitId, Integer scanId) throws MesopotamiaException { 156 RepositoryFactory factory=repoLanguage.getFactory(); 157 try { 158 LoadLevel loadLevel = factory.getEngine().getSourceUnitLoadLevel(data.getId(), sourceUnitId); 159 if (loadLevel==null) { 160 return null; 161 } 162 Collection<MesopotamiaToken> ret = new ArrayList<MesopotamiaToken>(); 163 Document doc=DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new GZIPInputStream(new ByteArrayInputStream(loadLevel.getLoaderData()))); 164 NodeList nl = doc.getDocumentElement().getChildNodes(); 165 MesopotamiaToken prevToken = null; 166 for (int i = 0, l = nl.getLength(); i<l; ++i) { 167 Node n = nl.item(i); 168 if (n instanceof Element) { 169 Element el = (Element) n; 170 int typeId = Integer.parseInt(el.getAttribute("type")); 171 MesopotamiaToken token = new MesopotamiaToken(i, repoLanguage.isWhitespace(typeId)); 172 token.setType(repoLanguage.tokenTypeId2type(typeId)); 173 token.setTypeName(repoLanguage.tokenTypeId2name(typeId)); 174 token.setText(AbstractDomObject.getElementText(el)); 175 token.setColumn(Integer.parseInt(el.getAttribute("col"))); 176 token.setLine(Integer.parseInt(el.getAttribute("line"))); 177 token.setFilename("Source unit "+sourceUnitId); // TODO - real name. 178 token.setPrevToken(prevToken); 179 ret.add(token); 180 prevToken = token; 181 } 182 } 183 184 return ret; 185 } catch (Exception e) { 186 throw new MesopotamiaException("Cannot read tokens: "+e, e); 187 } 188 } 189 }