/* * This program is designed to fake parse an HTML file, * looking for headers, and generating a table of contents * based on it. * It will ignore all other headers. * * version 1.1 */ import java.io.*; public class jhtmlindex { static boolean inheader, waitforclose; static StringBuffer linebuf; static int linedepth=0, newdepth=0; /* state values are here */ public static void println(String msg){ System.out.println(msg); } /* prints a line in the "Table of contents" we are generating * Currently, defaults to using [OL] tags. * It WILL CHANGE the current global 'linedepth', * to match global 'newdepth'. */ static void printContentLine(String line){ if(newdepth>6) newdepth=6; while(linedepth"); linedepth++; } // there is an implied "else" here while(linedepth>newdepth){ println(""); linedepth--; } println("
  • "+line+"
  • "); } /* Given a string, hand-divide it up. sigh. * if it finds a H[1-6] header, set global newdepth to match int value. * If it finds a CLOSING Hxxx header, print out saved line at * depth newdepth via printContentLine() */ static void processString(String currtok){ int currpos=0, close=0; if(currtok==null) return; //Println("processString: string="+currtok); if(currtok.length()==0) return; /* just look for closing '>' */ if(waitforclose){ close=currtok.indexOf('>'); if(close==-1){ return; } waitforclose=false; /* if at end, will pass "empty" string */ processString(currtok.substring(close+1)); return; } if(inheader){ /* && !waitforclose */ currpos=currtok.indexOf("0){ linebuf.append(currtok.substring(0,currpos)); } printContentLine(linebuf.toString()); linebuf.setLength(0); inheader=false; /* now recurse to check for closing '>', and then see * if multiple headers are on a single line. * YUK! */ waitforclose=true; processString( currtok.substring(currpos+1) ); return; } /**** ... else... We look for the start of a Header tag */ currpos=0; while(true){ int tmpdepth=0,scanpos; scanpos=currtok.indexOf("',currpos); if(close == -1){ waitforclose=true; return; } /* process any remaining part of string */ processString(currtok.substring(close+1)); return; } static void processToken(int ttype,StreamTokenizer st){ String currtok; switch(ttype){ case StreamTokenizer.TT_NUMBER: currtok=Integer.toString((int)st.nval); break; case StreamTokenizer.TT_WORD: currtok=st.sval; break; default: currtok=null; return; } processString(currtok); } static void printToken(int ttype, StreamTokenizer st){ switch (ttype){ case StreamTokenizer.TT_NUMBER: println("Number "+st.nval); println(" (stringnumber=="+st.sval+")"); break; case StreamTokenizer.TT_WORD: println("printToken: String="+st.sval); break; } } public static void main(String args[]) { StreamTokenizer tokenstream; linebuf=new StringBuffer(100); InputStreamReader isreader=new InputStreamReader(System.in); BufferedReader buffreader=new BufferedReader(isreader); tokenstream=new StreamTokenizer(isreader); inheader=false; waitforclose=false; /* Add individual characters to be allowed as 'word' chars*/ /* They are not conveniently in a "range" */ tokenstream.wordChars('/','/'); tokenstream.wordChars('<','<'); tokenstream.wordChars('>','>'); println(""); try { int ttype; String line; /* There are TWO ways of doing this. * One way is to generate our own indenting. * The other is to simply not print anything * except header stuff, and let the H stuff * do its own formatting. */ int level=0; /* while((ttype=tokenstream.nextToken())!= StreamTokenizer.TT_EOF) { printToken(ttype,tokenstream); processToken(ttype,tokenstream); } */ do{ line=buffreader.readLine(); //println("DEBUG: line is "+line); processString(line); } while(line!=null); } catch (IOException err){ println("Got error reading token:\n"+err.getMessage()); } newdepth=0; while(linedepth-- >0){ println(""); } println(""); } }