001 package echopoint.util;
002
003 import java.io.BufferedReader;
004 import java.io.IOException;
005 import java.io.StringReader;
006 import java.util.ArrayList;
007 import java.util.List;
008
009 /**
010 * The TokenizerKit class is useful for break a String down
011 * into an array of "Token" strings. This class is more powerful
012 * than the standard java.util.StringTokenizer.
013 */
014 public class TokenizerKit {
015
016 /** not instantiable */
017 private TokenizerKit() {
018 }
019
020 /**
021 * Tokenizes a string into an array of Strings based on the delimeters while
022 * respecting the quote characters ' and " as well defining
023 * whitespace characters 0 to 32 as delimeters.
024 *
025 * @param str - the string to tokenize
026 * @param delims - the string of allowable delimeter characters
027 * @return a String[] of tokens
028 */
029 public static String[] tokenize(String str, String delims) {
030 return doTokenize(str,delims,0,32,33,0xFF,true, false, false);
031 }
032
033 /**
034 * Tokenizes a string into an array of Strings based on the delimeters while
035 * respecting the quote characters ' and " but the common whitespace characters
036 * space, tab, carriage return and newline are no longer whitespace.
037 *
038 * @param str - the string to tokenize
039 * @param delims - the string of allowable delimeter characters
040 * @return a String[] of tokens
041 */
042 public static String[] tokenizeStrict(String str, String delims) {
043 return doTokenize(str,delims,0,32,33,0xFF,true,true,false);
044 }
045
046 /**
047 * Tokenizes a string into an array of Strings based on the delimeters while
048 * respecting the quote characters ' and " as well defining
049 * whitespace characters 0 to 32 as delimeters.
050 *
051 * @param str - the string to tokenize
052 * @param delims - the string of allowable delimeter characters
053 * @param returnDelimeters - if true then delimeters will be returned in the tokens array
054 * @return a String[] of tokens
055 */
056 public static String[] tokenize(String str, String delims, boolean returnDelimeters) {
057 return doTokenize(str,delims,0,32,33,0xFF,true, false, returnDelimeters);
058 }
059
060 /**
061 * Tokenizes a string into an array of Strings based on the delimeters while
062 * respecting the quote characters ' and " but the common whitespace characters
063 * space, tab, carriage return and newline are no longer whitespace.
064 *
065 * @param str - the string to tokenize
066 * @param delims - the string of allowable delimeter characters
067 * @param returnDelimeters - if true then delimeters will be returned in the tokens array
068 * @return a String[] of tokens
069 */
070 public static String[] tokenizeStrict(String str, String delims, boolean returnDelimeters) {
071 return doTokenize(str,delims,0,32,33,0xFF,true,true,returnDelimeters);
072 }
073
074 /**
075 * This will split the given string into a series of lines (with no delimeters in the lines themselves)
076 *
077 * @param str - the String to split into lines
078 * @return a String[] of lines
079 */
080 public static String[] splitIntoLines(String str) {
081 List strList = new ArrayList();
082 StringReader sr = new StringReader(str);
083 BufferedReader br = new BufferedReader(sr);
084 try {
085 String line = br.readLine();
086 while (line != null) {
087 strList.add(line);
088 line = br.readLine();
089 }
090 } catch (IOException e) {
091 // cant happen on StringWritter
092 }
093 return (String[]) strList.toArray(new String[strList.size()]);
094
095 }
096
097 /** Do our tokenisation */
098 private static String[] doTokenize(String str, String delims, int loWhiteSpace, int hiWhiteSpace, int loWordChar, int hiWordChar, boolean useQuotes, boolean noCommonWhiteSpace, boolean returnDelims) {
099 try {
100 java.util.ArrayList tokenList = new java.util.ArrayList();
101 java.io.StringReader sr = new java.io.StringReader(str);
102 java.io.StreamTokenizer st = new java.io.StreamTokenizer(sr);
103
104 st.resetSyntax();
105 st.whitespaceChars(loWhiteSpace, hiWhiteSpace);
106 st.wordChars(loWordChar, hiWordChar);
107 for (int i = 0; i < delims.length(); i++) {
108 st.ordinaryChar(delims.charAt(i));
109 }
110 if (useQuotes) {
111 st.quoteChar('\'');
112 st.quoteChar('"');
113 }
114 if (noCommonWhiteSpace) {
115 st.wordChars(' ',' ');
116 st.wordChars('\t','\t');
117 st.wordChars('\n','\n');
118 st.wordChars('\r','\r');
119 }
120
121 String token = "";
122 int tt = st.nextToken();
123 while (tt != java.io.StreamTokenizer.TT_EOF) {
124 token = null;
125
126 switch (tt) {
127 case java.io.StreamTokenizer.TT_WORD :
128 // A word was found; the value is in sval
129 token = st.sval;
130 break;
131 case '"' :
132 // A double-quoted string was found; sval contains the contents
133 token = st.sval;
134 break;
135 case '\'' :
136 // A single-quoted string was found; sval contains the contents
137 token = st.sval;
138 break;
139 case java.io.StreamTokenizer.TT_EOF :
140 // End of file has been reached
141 break;
142 default :
143 // A regular character was found; the value is the token itself
144 if (returnDelims) {
145 token = new String(new char[] {(char)tt});
146 } else {
147 token = null;
148 }
149
150 break;
151 }
152 if (token != null)
153 tokenList.add(token);
154 tt = st.nextToken();
155 }
156 return (String[]) tokenList.toArray(new String[tokenList.size()]);
157
158 } catch (java.io.IOException ieo) {
159 return new String[0];
160 }
161 }
162 }