/*
* This program is designed to fake parse an HTML file,
* looking for headers, and generating a table of contents
* based on it.
* It will ignore all other headers.
*
* version 1.1
*/
import java.io.*;
public class jhtmlindex {
static boolean inheader, waitforclose;
static StringBuffer linebuf;
static int linedepth=0, newdepth=0;
/* state values are here */
public static void println(String msg){
System.out.println(msg);
}
/* prints a line in the "Table of contents" we are generating
* Currently, defaults to using [OL] tags.
* It WILL CHANGE the current global 'linedepth',
* to match global 'newdepth'.
*/
static void printContentLine(String line){
if(newdepth>6) newdepth=6;
while(linedepth");
linedepth++;
}
// there is an implied "else" here
while(linedepth>newdepth){
println("");
linedepth--;
}
println(""+line+"");
}
/* Given a string, hand-divide it up. sigh.
* if it finds a H[1-6] header, set global newdepth to match int value.
* If it finds a CLOSING Hxxx header, print out saved line at
* depth newdepth via printContentLine()
*/
static void processString(String currtok){
int currpos=0, close=0;
if(currtok==null) return;
//Println("processString: string="+currtok);
if(currtok.length()==0) return;
/* just look for closing '>' */
if(waitforclose){
close=currtok.indexOf('>');
if(close==-1){
return;
}
waitforclose=false;
/* if at end, will pass "empty" string */
processString(currtok.substring(close+1));
return;
}
if(inheader){ /* && !waitforclose */
currpos=currtok.indexOf("0){
linebuf.append(currtok.substring(0,currpos));
}
printContentLine(linebuf.toString());
linebuf.setLength(0);
inheader=false;
/* now recurse to check for closing '>', and then see
* if multiple headers are on a single line.
* YUK!
*/
waitforclose=true;
processString( currtok.substring(currpos+1) );
return;
}
/**** ... else... We look for the start of a Header tag */
currpos=0;
while(true){
int tmpdepth=0,scanpos;
scanpos=currtok.indexOf("',currpos);
if(close == -1){
waitforclose=true;
return;
}
/* process any remaining part of string */
processString(currtok.substring(close+1));
return;
}
static void processToken(int ttype,StreamTokenizer st){
String currtok;
switch(ttype){
case StreamTokenizer.TT_NUMBER:
currtok=Integer.toString((int)st.nval);
break;
case StreamTokenizer.TT_WORD:
currtok=st.sval;
break;
default:
currtok=null;
return;
}
processString(currtok);
}
static void printToken(int ttype, StreamTokenizer st){
switch (ttype){
case StreamTokenizer.TT_NUMBER:
println("Number "+st.nval);
println(" (stringnumber=="+st.sval+")");
break;
case StreamTokenizer.TT_WORD:
println("printToken: String="+st.sval);
break;
}
}
public static void main(String args[]) {
StreamTokenizer tokenstream;
linebuf=new StringBuffer(100);
InputStreamReader isreader=new InputStreamReader(System.in);
BufferedReader buffreader=new BufferedReader(isreader);
tokenstream=new StreamTokenizer(isreader);
inheader=false;
waitforclose=false;
/* Add individual characters to be allowed as 'word' chars*/
/* They are not conveniently in a "range" */
tokenstream.wordChars('/','/');
tokenstream.wordChars('<','<');
tokenstream.wordChars('>','>');
println("");
try {
int ttype;
String line;
/* There are TWO ways of doing this.
* One way is to generate our own indenting.
* The other is to simply not print anything
* except header stuff, and let the H stuff
* do its own formatting.
*/
int level=0;
/*
while((ttype=tokenstream.nextToken())!=
StreamTokenizer.TT_EOF)
{
printToken(ttype,tokenstream);
processToken(ttype,tokenstream);
}
*/
do{
line=buffreader.readLine();
//println("DEBUG: line is "+line);
processString(line);
} while(line!=null);
} catch (IOException err){
println("Got error reading token:\n"+err.getMessage());
}
newdepth=0;
while(linedepth-- >0){
println("");
}
println("");
}
}