001    package echopoint.util;
002    
003    import java.io.BufferedReader;
004    import java.io.IOException;
005    import java.io.StringReader;
006    import java.util.ArrayList;
007    import java.util.List;
008    
009    /**
010     * The TokenizerKit class is useful for break a String down 
011     * into an array of "Token" strings. This class is more powerful 
012     * than the standard java.util.StringTokenizer.
013     */
014    public class TokenizerKit {
015    
016            /** not instantiable */
017            private TokenizerKit() {
018            }
019    
020            /**
021             * Tokenizes a string into an array of Strings based on the delimeters while 
022             * respecting the quote characters ' and " as well defining 
023             * whitespace characters 0 to 32 as delimeters.
024             * 
025             * @param str - the string to tokenize
026             * @param delims - the string of allowable delimeter characters
027             * @return a String[] of tokens
028             */
029            public static String[] tokenize(String str, String delims) {
030                    return doTokenize(str,delims,0,32,33,0xFF,true, false, false);
031            }
032    
033            /**
034             * Tokenizes a string into an array of Strings based on the delimeters while 
035             * respecting the quote characters ' and " but the common whitespace characters 
036             * space, tab, carriage return and newline are no longer whitespace.
037             * 
038             * @param str - the string to tokenize
039             * @param delims - the string of allowable delimeter characters
040             * @return a String[] of tokens
041             */
042            public static String[] tokenizeStrict(String str, String delims) {
043                    return doTokenize(str,delims,0,32,33,0xFF,true,true,false);
044            }
045            
046            /**
047             * Tokenizes a string into an array of Strings based on the delimeters while 
048             * respecting the quote characters ' and " as well defining 
049             * whitespace characters 0 to 32 as delimeters.
050             * 
051             * @param str - the string to tokenize
052             * @param delims - the string of allowable delimeter characters
053             * @param returnDelimeters - if true then delimeters will be returned in the tokens array
054             * @return a String[] of tokens
055             */
056            public static String[] tokenize(String str, String delims, boolean returnDelimeters) {
057                    return doTokenize(str,delims,0,32,33,0xFF,true, false, returnDelimeters);
058            }
059    
060            /**
061             * Tokenizes a string into an array of Strings based on the delimeters while 
062             * respecting the quote characters ' and " but the common whitespace characters 
063             * space, tab, carriage return and newline are no longer whitespace.
064             * 
065             * @param str - the string to tokenize
066             * @param delims - the string of allowable delimeter characters
067             * @param returnDelimeters - if true then delimeters will be returned in the tokens array
068             * @return a String[] of tokens
069             */
070            public static String[] tokenizeStrict(String str, String delims, boolean returnDelimeters) {
071                    return doTokenize(str,delims,0,32,33,0xFF,true,true,returnDelimeters);
072            }       
073            
074            /**
075             * This will split the given string into a series of lines (with no delimeters in the lines themselves)
076             * 
077             * @param str - the String to split into lines
078             * @return a String[] of lines
079             */
080            public static String[] splitIntoLines(String str) {
081                    List strList = new ArrayList();
082                    StringReader sr = new StringReader(str);
083                    BufferedReader br = new BufferedReader(sr);
084                    try {
085                            String line = br.readLine();
086                            while (line != null) {
087                                    strList.add(line);
088                                    line = br.readLine();
089                            }
090                    } catch (IOException e) {
091                            // cant happen on StringWritter
092                    }
093                    return (String[]) strList.toArray(new String[strList.size()]);
094                    
095            }
096            
097            /** Do our tokenisation */
098            private static String[] doTokenize(String str, String delims, int loWhiteSpace, int hiWhiteSpace, int loWordChar, int hiWordChar, boolean useQuotes, boolean noCommonWhiteSpace, boolean returnDelims) {
099                    try {
100                            java.util.ArrayList tokenList = new java.util.ArrayList();
101                            java.io.StringReader sr = new java.io.StringReader(str);
102                            java.io.StreamTokenizer st = new java.io.StreamTokenizer(sr);
103    
104                            st.resetSyntax();
105                            st.whitespaceChars(loWhiteSpace, hiWhiteSpace);
106                            st.wordChars(loWordChar, hiWordChar);
107                            for (int i = 0; i < delims.length(); i++) {
108                                    st.ordinaryChar(delims.charAt(i));
109                            }
110                            if (useQuotes) {
111                                    st.quoteChar('\'');
112                                    st.quoteChar('"');
113                            }
114                            if (noCommonWhiteSpace) {
115                                    st.wordChars(' ',' ');
116                                    st.wordChars('\t','\t');
117                                    st.wordChars('\n','\n');
118                                    st.wordChars('\r','\r');
119                            }
120    
121                            String token = "";
122                            int tt = st.nextToken();
123                            while (tt != java.io.StreamTokenizer.TT_EOF) {
124                                    token = null;
125    
126                                    switch (tt) {
127                                            case java.io.StreamTokenizer.TT_WORD :
128                                                    // A word was found; the value is in sval
129                                                    token = st.sval;
130                                                    break;
131                                            case '"' :
132                                                    // A double-quoted string was found; sval contains the contents
133                                                    token = st.sval;
134                                                    break;
135                                            case '\'' :
136                                                    // A single-quoted string was found; sval contains the contents
137                                                    token = st.sval;
138                                                    break;
139                                            case java.io.StreamTokenizer.TT_EOF :
140                                                    // End of file has been reached
141                                                    break;
142                                            default :
143                                                    // A regular character was found; the value is the token itself
144                                                    if (returnDelims) {
145                                                            token = new String(new char[] {(char)tt});
146                                                    } else {
147                                                            token = null;
148                                                    }
149                                                            
150                                                    break;
151                                    }
152                                    if (token != null)
153                                            tokenList.add(token);
154                                    tt = st.nextToken();
155                            }
156                            return (String[]) tokenList.toArray(new String[tokenList.size()]);
157    
158                    } catch (java.io.IOException ieo) {
159                            return new String[0];
160                    }
161            }
162    }