PM20230220027_Web.git

/**
 * @author: Tobias Nickel
 * @date: 06.04.2015
 * I needed a small xmlparser chat can be used in a worker.
 */
 
/**
 * parseXML / html into a DOM Object. with no validation and some failur tolerance
 * @params S {string} your XML to parse
 * @param options {object} all other options:
 *            searchId {string} the id of a single element, that should be returned. using this will increase the speed rapidly
 *            filter {function} filter method, as you know it from Array.filter. but is goes throw the DOM.
 *            simplify {bool} to use tXml.simplify.
 */
function tXml(S, options) {
    "use strict";
    options = options || {};
    var openBracket = "<";
    var openBracketCC = "<".charCodeAt(0);
    var closeBracket = ">";
    var closeBracketCC = ">".charCodeAt(0);
    var minus = "-";
    var minusCC = "-".charCodeAt(0);
    var slash = "/";
    var slashCC = "/".charCodeAt(0);
    var exclamation = '!';
    var exclamationCC = '!'.charCodeAt(0);
    var singleQuote = "'";
    var singleQuoteCC = "'".charCodeAt(0);
    var doubleQuote = '"';
    var doubleQuoteCC = '"'.charCodeAt(0);
    
    /**
     * parsing a list of entries
     */
    
    function parseChildren() {
        var children = [];
        while (S[pos]) {
            if (S.charCodeAt(pos) == openBracketCC) {
                if (S.charCodeAt(pos + 1) === slashCC) {
                    //while(S[pos]!=='>'){ pos++; }
                    pos = S.indexOf(closeBracket, pos);
                    return children;
                } else if (S.charCodeAt(pos + 1) === exclamationCC) {
                    if (S.charCodeAt(pos + 2) == minusCC) {
                        //comment support
                        while (!(S.charCodeAt(pos) === closeBracketCC && S.charCodeAt(pos - 1) == minusCC && S.charCodeAt(pos - 2) == minusCC && pos != -1)) {
                            pos = S.indexOf(closeBracket, pos + 1);
                        }
                        if (pos === -1)
                            pos = S.length
                    } else {
                        // doctypesupport
                        pos += 2;
                        while (S.charCodeAt(pos) !== closeBracketCC) {
                            pos++;
                        }
                    }
                    pos++;
                    continue;
                }
                var node = parseNode();
                children.push(node);
            } else {
                var text = parseText()
                if (text.trim().length > 0)
                    children.push(text);
            }
            pos++;
        }
        return children;
    }
    /**
     *    returns the text outside of texts until the first '<'
     */
    
    function parseText() {
        var start = pos;
        pos = S.indexOf(openBracket, pos) - 1;
        if (pos === -2)
            pos = S.length;
        return S.slice(start, pos + 1);
    }
    /**
     *    returns text until the first nonAlphebetic letter
     */
    var nameSpacer = '\n\t>/= ';
    
    function parseName() {
        var start = pos;
        while (nameSpacer.indexOf(S[pos]) === -1) {
            pos++;
        }
        return S.slice(start, pos);
    }
    /**
     *    is parsing a node, including tagName, Attributes and its children,
     * to parse children it uses the parseChildren again, that makes the parsing recursive
     */
    var NoChildNodes = [/*'img', 'br', 'input', 'meta', 'link'*/];
    function parseNode() {
        
        var node = {};
        pos++;
        node.tagName = parseName();
        
        // parsing attributes
        var attrFound = false;
        while (S.charCodeAt(pos) !== closeBracketCC) {
            var c = S.charCodeAt(pos);
            if ((c > 64 && c < 91) || (c > 96 && c < 123)) {
                //if('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'.indexOf(S[pos])!==-1 ){
                var name = parseName();
                // search beginning of the string
                var code = S.charCodeAt(pos);
                while (code !== singleQuoteCC && code !== doubleQuoteCC && !((code > 64 && code < 91) || (code > 96 && code < 123)) && code !== closeBracketCC) {
                    pos++;
                    code = S.charCodeAt(pos);
                }
                if (!attrFound) {
                    node.attributes = {};
                    attrFound = true;
                }
                if (code === singleQuoteCC || code === doubleQuoteCC) {
                    var value = parseString();
                } else {
                    value = null ;
                    pos--;
                }
                node.attributes[name] = value;
            }
            pos++;
        
        }
        // optional parsing of children
        if (S.charCodeAt(pos - 1) !== slashCC) {
            if (node.tagName == "script") {
                var start = pos + 1;
                pos = S.indexOf('</script>', pos);
                node.children = [S.slice(start, pos - 1)];
                pos += 8;
            } else if (node.tagName == "style") {
                var start = pos + 1;
                pos = S.indexOf('</style>', pos);
                node.children = [S.slice(start, pos - 1)];
                pos += 7;
            } else if (NoChildNodes.indexOf(node.tagName) == -1) {
                pos++;
                node.children = parseChildren(name);
            } 
        }
        return node;
    }
    /**
     *    is parsing a string, that starts with a char and with the same usually  ' or "
     */
    
    function parseString() {
        var startChar = S[pos];
        var startpos = ++pos;
        pos = S.indexOf(startChar, startpos)
        return S.slice(startpos, pos);
    }
    function findId() {
        return new RegExp('\s*id\s*=\s*[\'"]' + options.searchId + '[\'"]').exec(S).index;
    }
    var out=null;
    if (options.searchId) {
        var pos = findId();
        if (pos !== -1) {  
            pos = S.lastIndexOf('<', pos);
            if (pos !== -1) {
                out = parseNode();
            }
        }
        return pos;
    } else {
        var pos = 0;
        out = parseChildren();
    }
 
    if(options.filter){
        out = tXml.filter(out,options.filter);
    }
 
    if(options.simplify){
        out = tXml.simplify(out);
    }
    return out;
}
/**
 * transform the DomObject to an object that is like the object of PHPs simplexmp_load_*() methods.
 * this format helps you to write that is more likely to keep your programm working, even if there a small changes in the XML schema.
 * be aware, that it is not possible to reproduce the original xml from a simplefied version, because the order of elements is not saved.
 * therefore your programm will be more flexible and easyer to read.
 *
 * @param {array} the childrenList
 */
tXml.simplify = function simplify(children) {
    var out = {};
    
    if (children == undefined)
        return out;
 
    if(children.length === 1 && typeof children[0] == 'string')
        return {value: children[0] };
 
    // map each object
    children.forEach(function(child) {
 
        if (!out[child.tagName])
            out[child.tagName] = [];
        if (typeof child == 'object') {
            var kids = tXml.simplify(child.children);
            out[child.tagName].push(kids);
            if (child.attributes) {
                kids._attributes = child.attributes;
            }
        }else{
            out[child.tagName].push(child);
        }
    }
    );
    
    for (var i in out) {
        if (out[i].length == 1) {
            out[i] = out[i][0];
        }
    }
    
    return out;
};
 
/**
 * behaves the same way as Array.filter, if the filter method return true, the element is in the resultList
 * @params children{Array} the children of a node
 * @param f{function} the filter method 
 */
tXml.filter = function(children,f){
    var out=[];
    children.forEach(function(child){
        if(typeof(child) === 'object' && f(child))out.push(child);
        if(child.children){
            var kids = tXml.filter(child.children,f);
            out = out.concat(kids);
        }
    });
    return out;
};
/*
console.clear();
tXml(d,'content');
 //some testCode
var s = document.body.innerHTML.toLowerCase();
var start = new Date().getTime();
var o = tXml(s,'content');
var end = new Date().getTime();
//console.log(JSON.stringify(o,undefined,'\t'));
console.log("MILLISECONDS",end-start);
var nodeCount=document.querySelectorAll('*').length;
console.log('node count',nodeCount);
console.log("speed:",(1000/(end-start))*nodeCount,'Nodes / second')
//console.log(JSON.stringify(tXml('<html><head><title>testPage</title></head><body><h1>TestPage</h1><p>this is a <b>test</b>page</p></body></html>'),undefined,'\t'));
var p = new DOMParser();
var s2='<body>'+s+'</body>'
var start2= new Date().getTime();
var o2 = p.parseFromString(s2,'text/html').querySelector('#content')
var end2=new Date().getTime();
console.log("MILLISECONDS",end2-start2);
// */