001    import java.io.IOException;
002    
003    //Hier werden nur Standardklassen und Interfaces importiert
004    import org.w3c.dom.*;
005    import org.w3c.dom.bootstrap.DOMImplementationRegistry;
006    import org.w3c.dom.ls.DOMImplementationLS;
007    import org.w3c.dom.ls.LSParser;
008    
009    /**
010     * This class provides static methods to load, analyse and display a DOM
011     * structure of an XML Document
012     * 
013     * @author Udo Altmann
014     * @version 2.0 (2005)
015     * 
016     */
017    
018    public class DOMAnalyse {
019    
020            /**
021             * @param args
022             *            File to be analysed
023             * @throws IOException
024             * @throws IllegalAccessException
025             * @throws InstantiationException
026             * @throws ClassNotFoundException
027             * @throws ClassCastException
028             */
029            public static void main(String args[]) throws IOException,
030                            ClassCastException, ClassNotFoundException, InstantiationException,
031                            IllegalAccessException {
032    
033                    // DOM Level 3 DOMImplementationSourceImpl implements
034                    // org.w3c.dom.DOMImplementationSource
035                    // An dieser Stelle werden die zur Verfügung stehenden Implementationen
036                    // gesetzt
037                    System.setProperty(DOMImplementationRegistry.PROPERTY,
038                                    "org.apache.xerces.dom.DOMImplementationSourceImpl");
039    
040                    // Class from org.w3c.dom.bootstrap
041                    DOMImplementationRegistry registry;
042                    registry = DOMImplementationRegistry.newInstance();
043    
044                    DOMImplementationLS impl = (DOMImplementationLS) registry
045                                    .getDOMImplementation("LS"); // Load and Save
046    
047                    LSParser parser = impl.createLSParser(
048                                    DOMImplementationLS.MODE_SYNCHRONOUS, null);
049                    
050                    DOMConfiguration conf = parser.getDomConfig();
051    
052                    // Setzen diverser Attribute
053                    conf.setParameter("validate-if-schema", new Boolean(true));
054                    conf.setParameter("cdata-sections", new Boolean(true));
055                    conf.setParameter("entities", new Boolean(true));
056    
057                    DOMStringList sl = conf.getParameterNames();
058                    int i;
059                    System.out.println("Parameters for parsing:");
060                    for (i = 0; i < sl.getLength(); i++) {
061                            System.out.println("\t" + sl.item(i) + ": "
062                                            + conf.getParameter(sl.item(i)));
063                    }
064    
065                    //
066                    // Die Verarbeitung startet mit dem Interface Document
067                    //
068                    // Dokumente sind auch Knoten
069                    // 
070                    Document doc = parser.parseURI(args[0]);
071                    process(doc);
072            }
073    
074            /**
075             * @param nd
076             *            Node to be analysed. Child nodes are processed recursively
077             */
078            public static void process(Node nd) {
079                    System.out.println("Node type:\t" + nd.getNodeType() + " - "
080                                    + nodeTypeToString(nd.getNodeType()));
081                    System.out.println("\tname:\t" + nd.getNodeName());
082                    System.out.println("\tvalue:\t\"" + nd.getNodeValue() + "\"");
083                    if (nd.getParentNode() != null) {
084                            System.out.println("\tparent:\t"
085                                            + nodeTypeToString(nd.getParentNode().getNodeType()) + "\t"
086                                            + nd.getParentNode().getNodeName());
087                    } else {
088                                    System.out.println("\tno parent\t");
089                            }
090                    System.out.println("\tspecific details:\t");
091                    // nd.
092                    switch (nd.getNodeType()) {
093                    case Node.DOCUMENT_NODE:
094                            Document doc = (Document) nd;
095                            System.out.println("\t\tDocumentElement: "
096                                            + doc.getDocumentElement().getNodeName());
097                            break;
098                    case Node.ELEMENT_NODE:
099                            Element el = (Element) nd;
100                            System.out.println("\t\tgetTagName: " + el.getTagName());
101                            System.out.println("\t\tgetLocalName: " + el.getLocalName());
102                            System.out.println("\t\tgetNamespaceURI: " + el.getNamespaceURI());
103                            //
104                            // hasAttributes since DOM-Level 2 (for elements)       
105                            //
106                            if (el.hasAttributes()) {
107                                    System.out.println("\t\tAttribute:");
108                                    NamedNodeMap nnm = el.getAttributes();
109                                    int i;
110                                    for (i = 0; i < nnm.getLength(); i++) {
111                                            process(nnm.item(i));
112                                    }
113                            } else {
114                                    System.out.println("\t\tno attributes!");
115                            }
116                            break;
117                    case Node.ATTRIBUTE_NODE:
118                            Attr at = (Attr) nd;
119                            System.out.println("\t\tgetName:" + at.getName());
120                            System.out.println("\t\tspecified: " + at.getSpecified());
121                            System.out.println("\t\tgetValue: " + at.getValue());
122                            System.out.println("\t\tgetLocalName: " + at.getLocalName());
123                            System.out.println("\t\tgetNamespaceURI: " + at.getNamespaceURI());
124                            break;
125                    case Node.COMMENT_NODE:
126                            Comment cm = (Comment) nd;
127                            System.out.println("\t\tgetData: " + cm.getData());
128                            System.out.println("\t\tgetLength: " + cm.getLength());
129                            break;
130                    case Node.TEXT_NODE:
131                            Text tn = (Text) nd;
132                            System.out.println("\t\tgetData: " + tn.getData());
133                            System.out.println("\t\tgetLength: " + tn.getLength());
134                            break;
135                    case Node.CDATA_SECTION_NODE:
136                            Text cs = (Text) nd;
137                            System.out.println("\t\tgetData: " + cs.getData());
138                            System.out.println("\t\tgetLength: " + cs.getLength());
139                            break;
140                    case Node.DOCUMENT_TYPE_NODE:
141                            int i;
142                            DocumentType dt = (DocumentType) nd;
143                            System.out.println("\t\tgetName: " + dt.getName());
144                            System.out.println("\t\tgetPublicId: " + dt.getPublicId());
145                            System.out.println("\t\tgetSystemId: " + dt.getSystemId());
146                            //
147                            // DOM 2 und parserabhängig
148                            //
149                            System.out.println("\t\tgetInternalSubset: "
150                                            + dt.getInternalSubset());
151                            NamedNodeMap nnm = dt.getEntities();
152                            if (nnm.getLength() <= 0) {
153                                    System.out.println("\t\tno Entities");
154                            } else {
155                                    for (i = 0; i < nnm.getLength(); i++) {
156                                            process(nnm.item(i));
157                                    }
158                            }
159                            nnm = dt.getNotations();
160                            if (nnm.getLength() <= 0) {
161                                    System.out.println("\t\tno Notations");
162                            } else {
163                                    for (i = 0; i < nnm.getLength(); i++) {
164                                            process(nnm.item(i));
165                                    }
166                            }
167                            break;
168                    case Node.NOTATION_NODE:
169                            Notation no = (Notation) nd;
170                            System.out.println("\t\tgetPublicId: " + no.getPublicId());
171                            System.out.println("\t\tgetSytemId: " + no.getSystemId());
172                            break;
173                    case Node.ENTITY_NODE:
174                            Entity en = (Entity) nd;
175                            System.out.println("\t\tgetNotationName: " + en.getNotationName());
176                            System.out.println("\t\tgetPublicId: " + en.getPublicId());
177                            System.out.println("\t\tgetSytemId: " + en.getSystemId());
178                            break;
179                    case Node.PROCESSING_INSTRUCTION_NODE:
180                            ProcessingInstruction pe = (ProcessingInstruction) nd;
181                            System.out.println("\t\tgetTarget: " + pe.getTarget());
182                            System.out.println("\t\tgetData: " + pe.getData());
183                            break;
184                    // 
185                    // Für die folgenden Knotentyp gibt es keine speziellen
186                    // Attribute/Methoden
187                    // 
188                    case Node.ENTITY_REFERENCE_NODE:
189                    //
190                    // EntityRefences werden evtl. nicht erkannt, wenn sie beim Parsen
191                    // ersetzt werden
192                    // 
193                    // EntityReference er = (EntityReference) nd;
194                    // break;
195                    case Node.DOCUMENT_FRAGMENT_NODE:
196                    // DocumentFragment df = (DocumentFragment) nd;
197                    // break;
198                    default:
199                            System.out.println("\t\tno details!");
200                    }
201                    if (nd.hasChildNodes()) {
202                            NodeList nl = nd.getChildNodes();
203                            int i;
204                            for (i = 0; i < nl.getLength(); i++) {
205                                    process(nl.item(i));
206                            }
207                    }
208            }
209    
210            /**
211             * Static method to convert Nodetypes to Strings
212             * 
213             * @param s
214             *            Type as short
215             * @return Type as String
216             */
217            public static String nodeTypeToString(short s) {
218    
219                    switch (s) {
220                    case Node.ATTRIBUTE_NODE:
221                            return "ATTRIBUTE_NODE";
222                    case Node.CDATA_SECTION_NODE:
223                            return "CDATA_SECTION_NODE";
224                    case Node.COMMENT_NODE:
225                            return "COMMENT_NODE";
226                    case Node.DOCUMENT_FRAGMENT_NODE:
227                            return "DOCUMENT_FRAGMENT_NODE";
228                    case Node.DOCUMENT_NODE:
229                            return "DOCUMENT_NODE";
230                    case Node.DOCUMENT_TYPE_NODE:
231                            return "DOCUMENT_TYPE_NODE";
232                    case Node.ELEMENT_NODE:
233                            return "ELEMENT_NODE";
234                    case Node.ENTITY_NODE:
235                            return "ENTITY_NODE";
236                    case Node.ENTITY_REFERENCE_NODE:
237                            return "ENTITY_REFERENCE_NODE";
238                    case Node.NOTATION_NODE:
239                            return "NOTATION_NODE";
240                    case Node.PROCESSING_INSTRUCTION_NODE:
241                            return "PROCESSING_INSTRUCTION_NODE";
242                    case Node.TEXT_NODE:
243                            return "TEXT_NODE";
244                    default:
245    
246                            return null;
247                    }
248            }
249    
250    }