Updated 3 Sep 1999
This is a Java version of HTML Tidy Release 26 Jul
1999 Copyright © 1999 W3C, see
Tidy.java
for the copyright notice.
I have made available:
To use the Tidy Java Bean, just include JTidy\lib\Tidy.jar
in
your classpath.
To build Tidy from the source, you need a Java compiler/runtime
environment, supporting Java 1.1 or higher. First, download and
expand the archive. For Win 9x/NT, build it using the batch file
JTidy\make\build.bat
as follows:
cd JTidy\make build c: 26jul9999
Where c:
is the root where you expanded the JTidy archive,
and
26jul1999
is the directory under JTidy\src
where the source is located.
NOTE: build.bat
assumes that the environment variable
java_home
points to your JDK installation, and that the
JDK tools are in your path.
For Unix environments, follow the procedure in build.bat
.
Sorry about that, I will try to get a more generic build procedure
going in the future.
The main class is: org.w3c.tidy.Tidy
docTypeStr
to
docType
and handled the same as configuration file 'doctype'
string. Fixed potential IndexOutOfBoundsException's
in
Clean.createProps
.InputStreamName
property to Bean.
Tried speed optimization
in Lexer.wstrcasecmp
.import java.io.IOException; import java.net.URL; import java.io.BufferedInputStream; import java.io.FileOutputStream; import java.io.PrintWriter; import java.io.FileWriter; import org.w3c.tidy.Tidy; /** * This program shows how HTML could be tidied directly from * a URL stream, and running on separate threads. Note the use * of the 'parse' method to parse from an InputStream, and send * the pretty-printed result to an OutputStream. * In this example thread th1 outputs XML, and thread th2 outputs * HTML. This shows that properties are per instance of Tidy. */ public class Test16 implements Runnable { private String url; private String outFileName; private String errOutFileName; private boolean xmlOut; public Test16(String url, String outFileName, String errOutFileName, boolean xmlOut) { this.url = url; this.outFileName = outFileName; this.errOutFileName = errOutFileName; this.xmlOut = xmlOut; } public void run() { URL u; BufferedInputStream in; FileOutputStream out; Tidy tidy = new Tidy(); tidy.setXmlOut(xmlOut); try { tidy.setErrout(new PrintWriter(new FileWriter(errOutFileName), true)); u = new URL(url); in = new BufferedInputStream(u.openStream()); out = new FileOutputStream(outFileName); tidy.parse(in, out); } catch ( IOException e ) { System.out.println( this.toString() + e.toString() ); } } public static void main( String[] args ) { Test16 t1 = new Test16(args[0], args[1], args[2], true); Test16 t2 = new Test16(args[3], args[4], args[5], false); Thread th1 = new Thread(t1); Thread th2 = new Thread(t2); th1.start(); th2.start(); } }
import java.io.IOException; import java.io.FileInputStream; import org.w3c.tidy.Tidy; import org.w3c.tidy.Node; /** * This program shows how to use Tidy as an HTML parser. * It creates an instance of Tidy, calls the parse method * to parse a file input stream, and dumps a text representation * of the parse tree to System.out. */ public class Test17 { private static final String spaces = " "; private static void dump(Node node, int indent) { String prefix = spaces.substring(0, indent); Node n = node; while (n != null) { System.out.println( prefix + "------Node-------"); switch (node.type) { case Node.RootNode: System.out.println( prefix + "type: RootNode"); break; case Node.DocTypeTag: System.out.println( prefix + "type: DocTypeTag"); break; case Node.CommentTag: System.out.println( prefix + "type: CommentTag"); break; case Node.ProcInsTag: System.out.println( prefix + "type: ProcInsTag"); break; case Node.TextNode: System.out.println( prefix + "type: TextNode"); String v = n.getNodeValue(); if (v != null) { System.out.println( prefix + "value: " + v); } else { System.out.println( prefix + "value: null"); } break; case Node.StartTag: System.out.println( prefix + "type: StartTag"); break; case Node.EndTag: System.out.println( prefix + "type: EndTag"); break; case Node.StartEndTag: System.out.println( prefix + "type: StartEndTag"); break; case Node.AspTag: System.out.println( prefix + "type: AspTag"); break; default: System.out.println( prefix + "invalid type"); break; } if (n.element != null) { System.out.println( prefix + "element: " + n.element); } dump(n.content, indent+4); n = n.next; } } public static void main( String[] args ) { FileInputStream in; Tidy tidy = new Tidy(); Node root = null; try { in = new FileInputStream(args[0]); tidy.setMakeClean(true); root = tidy.parse(in, null); dump(root, 0); } catch ( IOException e ) { System.out.println( e.toString() ); } } }
Send questions, comments, or bug reports to Andy Quick.