001    /*
002      * mesopotamia-java @mesopotamia.version@
003     * Multilingual parser and repository. 
004     * Copyright (C) 2005  Hammurapi Group
005     *
006     * This program is free software; you can redistribute it and/or
007     * modify it under the terms of the GNU Lesser General Public
008     * License as published by the Free Software Foundation; either
009     * version 2 of the License, or (at your option) any later version.
010     *
011     * This program is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014     * Lesser General Public License for more details.
015     *
016     * You should have received a copy of the GNU Lesser General Public
017     * License along with this library; if not, write to the Free Software
018     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
019     *
020     * URL: http://http://www.hammurapi.biz
021     * e-Mail: support@hammurapi.biz
022     */
023    package org.mesopotamia.lang.java.v5;
024    
025    import java.io.ByteArrayInputStream;
026    import java.io.ByteArrayOutputStream;
027    import java.io.Reader;
028    import java.sql.SQLException;
029    import java.util.ArrayList;
030    import java.util.Collection;
031    import java.util.logging.Level;
032    import java.util.logging.Logger;
033    import java.util.zip.GZIPInputStream;
034    import java.util.zip.GZIPOutputStream;
035    
036    import javax.xml.parsers.DocumentBuilderFactory;
037    import javax.xml.transform.TransformerFactory;
038    import javax.xml.transform.dom.DOMSource;
039    import javax.xml.transform.stream.StreamResult;
040    
041    import org.mesopotamia.LoaderBase;
042    import org.mesopotamia.LoaderEntry;
043    import org.mesopotamia.MesopotamiaException;
044    import org.mesopotamia.MesopotamiaToken;
045    import org.mesopotamia.RepositoryFactory;
046    import org.mesopotamia.RepositoryLanguage;
047    import org.mesopotamia.Source;
048    import org.mesopotamia.SourceLoader;
049    import org.mesopotamia.lang.java.CheatingFilterReader;
050    import org.mesopotamia.sql.LoadLevel;
051    import org.mesopotamia.sql.LoadLevelImpl;
052    import org.w3c.dom.Document;
053    import org.w3c.dom.Element;
054    import org.w3c.dom.Node;
055    import org.w3c.dom.NodeList;
056    
057    import antlr.CharStreamException;
058    import antlr.Token;
059    import antlr.TokenStreamException;
060    import biz.hammurapi.convert.ConvertingService;
061    import biz.hammurapi.xml.dom.AbstractDomObject;
062    
063    
064    public class TokenLoader extends LoaderBase implements SourceLoader {
065            private static final Logger logger = Logger.getLogger(TokenLoader.class.getName());
066    
067            public TokenLoader(RepositoryLanguage repoLanguage, LoaderEntry xData) {
068                    super(repoLanguage, xData);
069            }
070    
071            public boolean load(int scanId, int sourceUnitId, Source source, Object environment) {
072                    RepositoryFactory factory=repoLanguage.getFactory();
073                    try {
074                            LoadLevelImpl lli = new LoadLevelImpl(true);
075                            lli.setLevelId(data.getId());
076                            lli.setSourceUnitId(sourceUnitId);
077                            try {
078                                    Object rSource = source.get();
079                                    Reader r = (Reader) ConvertingService.convert(rSource, Reader.class);
080                                    try {
081                                            // CheatingFilterReader is needed because of a bug in Java
082                                            // gramma - it stucks if last token in the file
083                                            // is SL_COMMENT
084    
085                                            JavaLexer lexer = new JavaLexer(new CheatingFilterReader(r)) {
086                                                    /**
087                                                     * Replacement for generated mIDENT to recognize international characters.
088                                                     */
089                                                    public Token nextToken() throws TokenStreamException {
090                                                    try {
091                                                            if (Character.isJavaIdentifierStart(LA(1))) {
092                                                                resetText();
093                                                                int _begin = text.length();
094                                                                    consume();
095                                                                    
096                                                                    while (Character.isJavaIdentifierPart(LA(1))) {
097                                                                        consume();
098                                                                    }
099                                                            
100                                                                Token ret = makeToken(testLiteralsTable(IDENT));
101                                                                ret.setText(new String(text.getBuffer(), _begin, text.length() - _begin));
102                                                                
103                                                                            // check if "assert" keyword is enabled
104                                                                            if (isAssertEnabled() && "assert".equals(ret.getText())) {
105                                                                                    ret.setType(LITERAL_assert); // set token type for the rule in the parser
106                                                                            }
107                                                                            // check if "enum" keyword is enabled
108                                                                            if (isEnumEnabled() && "enum".equals(ret.getText())) {
109                                                                                    ret.setType(LITERAL_enum); // set token type for the rule in the parser
110                                                                            }
111                                                                
112                                                                return ret;
113                                                                    }
114                                                            
115                                                                    return super.nextToken();
116                                                    } catch (CharStreamException e) {
117                                                        throw new TokenStreamException(e.toString());
118                                                    }
119                                                    }
120                                            };
121                                            
122                                        Document doc=DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
123                                        Element rootElement = AbstractDomObject.addElement(doc, "tokens");
124                                            
125                                            Token token;
126                                            for (int i = 0; (token = lexer.nextToken()).getType() != JavaTokenTypes.EOF; i++) {
127                                                    Element te = doc.createElement("token");
128                                                    rootElement.appendChild(te);
129                                                    te.setAttribute("col", String.valueOf(token.getColumn()));
130                                                    te.setAttribute("line", String.valueOf(token.getLine()));
131                                                    te.setAttribute("type", String.valueOf(repoLanguage.tokenType2id(token.getType())));
132                                                    if (token.getText()!=null) {
133                                                            te.appendChild(doc.createTextNode(token.getText()));
134                                                    }
135                                            }
136    
137                                            ByteArrayOutputStream baos = new ByteArrayOutputStream();
138                                            GZIPOutputStream gzos = new GZIPOutputStream(baos);
139                                    TransformerFactory.newInstance().newTransformer().transform(new DOMSource(doc), new StreamResult(gzos));
140                                            gzos.close();
141                                            lli.setLoaderData(baos.toByteArray());
142                                            
143                                            return true;
144                                    } finally {
145                                            r.close();
146                                    }
147                            } catch (Exception e) {
148                                    lli.setLoadFailed(true);
149                                    lli.setMessageId(storeErrorMessage(scanId, sourceUnitId, e));
150                                    logger.log(Level.WARNING, "Cannot load source '"+source+"' to level "+data.getLevel()+": "+e, e);
151                                    return false;
152                            } finally {
153                                    factory.getEngine().insertLoadLevel(lli);
154                            }
155                    } catch (SQLException e) {
156                            factory.consume(this, e);
157                            return false;
158                    }
159            }
160            
161            /**
162             * Reads tokens from BLOB and returns collection of biz.hammurapi.antlr.Token instances
163             */
164            public Object getData(int sourceUnitId, Integer scanId) throws MesopotamiaException {
165                    RepositoryFactory factory=repoLanguage.getFactory();
166                    try {
167                            LoadLevel loadLevel = factory.getEngine().getSourceUnitLoadLevel(data.getId(), sourceUnitId);
168                            if (loadLevel==null) {
169                                    return null;
170                            }
171                            Collection<MesopotamiaToken> ret = new ArrayList<MesopotamiaToken>();                                               
172                        Document doc=DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new GZIPInputStream(new ByteArrayInputStream(loadLevel.getLoaderData())));
173                        NodeList nl = doc.getDocumentElement().getChildNodes();
174                        MesopotamiaToken prevToken = null;
175                            for (int i = 0, l = nl.getLength(); i<l; ++i) {
176                                    Node n = nl.item(i);
177                                    if (n instanceof Element) {
178                                            Element el = (Element) n;
179                                            int typeId = Integer.parseInt(el.getAttribute("type"));
180                                            MesopotamiaToken token = new MesopotamiaToken(i, repoLanguage.isWhitespace(typeId));  
181                                            token.setType(repoLanguage.tokenTypeId2type(typeId));
182                                            token.setTypeName(repoLanguage.tokenTypeId2name(typeId));
183                                            token.setText(AbstractDomObject.getElementText(el));
184                                            token.setColumn(Integer.parseInt(el.getAttribute("col")));
185                                            token.setLine(Integer.parseInt(el.getAttribute("line")));
186                                            token.setFilename("Source unit "+sourceUnitId); // TODO - real name.
187                                            token.setPrevToken(prevToken);
188                                            ret.add(token);
189    //                                      System.out.println("[TOKEN] "+token);
190                                            prevToken = token;                                      
191                                    }
192                            }
193    
194                            return ret;
195                    } catch (Exception e) {
196                            throw new MesopotamiaException("Cannot read tokens: "+e, e);
197                    }
198            }
199    }