var NAMESPACE = require("./conventions").NAMESPACE; //[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] //[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] //[5] Name ::= NameStartChar (NameChar)* var nameStartChar = /[A-Z_a-z\xC0-\xD6\xD8-\xF6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]///\u10000-\uEFFFF var nameChar = new RegExp("[\\-\\.0-9"+nameStartChar.source.slice(1,-1)+"\\u00B7\\u0300-\\u036F\\u203F-\\u2040]"); var tagNamePattern = new RegExp('^'+nameStartChar.source+nameChar.source+'*(?:\:'+nameStartChar.source+nameChar.source+'*)?$'); //var tagNamePattern = /^[a-zA-Z_][\w\-\.]*(?:\:[a-zA-Z_][\w\-\.]*)?$/ //var handlers = 'resolveEntity,getExternalSubset,characters,endDocument,endElement,endPrefixMapping,ignorableWhitespace,processingInstruction,setDocumentLocator,skippedEntity,startDocument,startElement,startPrefixMapping,notationDecl,unparsedEntityDecl,error,fatalError,warning,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,comment,endCDATA,endDTD,endEntity,startCDATA,startDTD,startEntity'.split(',') //S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE //S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE var S_TAG = 0;//tag name offerring var S_ATTR = 1;//attr name offerring var S_ATTR_SPACE=2;//attr name end and space offer var S_EQ = 3;//=space? var S_ATTR_NOQUOT_VALUE = 4;//attr value(no quot value only) var S_ATTR_END = 5;//attr value end and no space(quot end) var S_TAG_SPACE = 6;//(attr value end || tag end ) && (space offer) var S_TAG_CLOSE = 7;//closed el /** * Creates an error that will not be caught by XMLReader aka the SAX parser. * * @param {string} message * @param {any?} locator Optional, can provide details about the location in the source * @constructor */ function ParseError(message, locator) { this.message = message this.locator = locator if(Error.captureStackTrace) Error.captureStackTrace(this, ParseError); } ParseError.prototype = new Error(); ParseError.prototype.name = ParseError.name function XMLReader(){ } XMLReader.prototype = { parse:function(source,defaultNSMap,entityMap){ var domBuilder = this.domBuilder; domBuilder.startDocument(); _copy(defaultNSMap ,defaultNSMap = {}) parse(source,defaultNSMap,entityMap, domBuilder,this.errorHandler); domBuilder.endDocument(); } } function parse(source,defaultNSMapCopy,entityMap,domBuilder,errorHandler){ function fixedFromCharCode(code) { // String.prototype.fromCharCode does not supports // > 2 bytes unicode chars directly if (code > 0xffff) { code -= 0x10000; var surrogate1 = 0xd800 + (code >> 10) , surrogate2 = 0xdc00 + (code & 0x3ff); return String.fromCharCode(surrogate1, surrogate2); } else { return String.fromCharCode(code); } } function entityReplacer(a){ var k = a.slice(1,-1); if(k in entityMap){ return entityMap[k]; }else if(k.charAt(0) === '#'){ return fixedFromCharCode(parseInt(k.substr(1).replace('x','0x'))) }else{ errorHandler.error('entity not found:'+a); return a; } } function appendText(end){//has some bugs if(end>start){ var xt = source.substring(start,end).replace(/&#?\w+;/g,entityReplacer); locator&&position(start); domBuilder.characters(xt,0,end-start); start = end } } function position(p,m){ while(p>=lineEnd && (m = linePattern.exec(source))){ lineStart = m.index; lineEnd = lineStart + m[0].length; locator.lineNumber++; //console.log('line++:',locator,startPos,endPos) } locator.columnNumber = p-lineStart+1; } var lineStart = 0; var lineEnd = 0; var linePattern = /.*(?:\r\n?|\n)|.*$/g var locator = domBuilder.locator; var parseStack = [{currentNSMap:defaultNSMapCopy}] var closeMap = {}; var start = 0; while(true){ try{ var tagStart = source.indexOf('<',start); if(tagStart<0){ if(!source.substr(start).match(/^\s*$/)){ var doc = domBuilder.doc; var text = doc.createTextNode(source.substr(start)); doc.appendChild(text); domBuilder.currentElement = text; } return; } if(tagStart>start){ appendText(tagStart); } switch(source.charAt(tagStart+1)){ case '/': var end = source.indexOf('>',tagStart+3); var tagName = source.substring(tagStart + 2, end).replace(/[ \t\n\r]+$/g, ''); var config = parseStack.pop(); if(end<0){ tagName = source.substring(tagStart+2).replace(/[\s<].*/,''); errorHandler.error("end tag name: "+tagName+' is not complete:'+config.tagName); end = tagStart+1+tagName.length; }else if(tagName.match(/\s locator&&position(tagStart); end = parseInstruction(source,tagStart,domBuilder); break; case '!':// start){ start = end; }else{ //TODO: 这里有可能sax回退,有位置错误风险 appendText(Math.max(tagStart,start)+1); } } } function copyLocator(f,t){ t.lineNumber = f.lineNumber; t.columnNumber = f.columnNumber; return t; } /** * @see #appendElement(source,elStartEnd,el,selfClosed,entityReplacer,domBuilder,parseStack); * @return end of the elementStartPart(end of elementEndPart for selfClosed el) */ function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,errorHandler){ /** * @param {string} qname * @param {string} value * @param {number} startIndex */ function addAttribute(qname, value, startIndex) { if (el.attributeNames.hasOwnProperty(qname)) { errorHandler.fatalError('Attribute ' + qname + ' redefined') } el.addValue(qname, value, startIndex) } var attrName; var value; var p = ++start; var s = S_TAG;//status while(true){ var c = source.charAt(p); switch(c){ case '=': if(s === S_ATTR){//attrName attrName = source.slice(start,p); s = S_EQ; }else if(s === S_ATTR_SPACE){ s = S_EQ; }else{ //fatalError: equal must after attrName or space after attrName throw new Error('attribute equal must after attrName'); // No known test case } break; case '\'': case '"': if(s === S_EQ || s === S_ATTR //|| s == S_ATTR_SPACE ){//equal if(s === S_ATTR){ errorHandler.warning('attribute value must after "="') attrName = source.slice(start,p) } start = p+1; p = source.indexOf(c,start) if(p>0){ value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer); addAttribute(attrName, value, start-1); s = S_ATTR_END; }else{ //fatalError: no end quot match throw new Error('attribute value no end \''+c+'\' match'); } }else if(s == S_ATTR_NOQUOT_VALUE){ value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer); //console.log(attrName,value,start,p) addAttribute(attrName, value, start); //console.dir(el) errorHandler.warning('attribute "'+attrName+'" missed start quot('+c+')!!'); start = p+1; s = S_ATTR_END }else{ //fatalError: no equal before throw new Error('attribute value must after "="'); // No known test case } break; case '/': switch(s){ case S_TAG: el.setTagName(source.slice(start,p)); case S_ATTR_END: case S_TAG_SPACE: case S_TAG_CLOSE: s =S_TAG_CLOSE; el.closed = true; case S_ATTR_NOQUOT_VALUE: case S_ATTR: case S_ATTR_SPACE: break; //case S_EQ: default: throw new Error("attribute invalid close char('/')") // No known test case } break; case ''://end document errorHandler.error('unexpected end of input'); if(s == S_TAG){ el.setTagName(source.slice(start,p)); } return p; case '>': switch(s){ case S_TAG: el.setTagName(source.slice(start,p)); case S_ATTR_END: case S_TAG_SPACE: case S_TAG_CLOSE: break;//normal case S_ATTR_NOQUOT_VALUE://Compatible state case S_ATTR: value = source.slice(start,p); if(value.slice(-1) === '/'){ el.closed = true; value = value.slice(0,-1) } case S_ATTR_SPACE: if(s === S_ATTR_SPACE){ value = attrName; } if(s == S_ATTR_NOQUOT_VALUE){ errorHandler.warning('attribute "'+value+'" missed quot(")!'); addAttribute(attrName, value.replace(/&#?\w+;/g,entityReplacer), start) }else{ if(!NAMESPACE.isHTML(currentNSMap['']) || !value.match(/^(?:disabled|checked|selected)$/i)){ errorHandler.warning('attribute "'+value+'" missed value!! "'+value+'" instead!!') } addAttribute(value, value, start) } break; case S_EQ: throw new Error('attribute value missed!!'); } // console.log(tagName,tagNamePattern,tagNamePattern.test(tagName)) return p; /*xml space '\x20' | #x9 | #xD | #xA; */ case '\u0080': c = ' '; default: if(c<= ' '){//space switch(s){ case S_TAG: el.setTagName(source.slice(start,p));//tagName s = S_TAG_SPACE; break; case S_ATTR: attrName = source.slice(start,p) s = S_ATTR_SPACE; break; case S_ATTR_NOQUOT_VALUE: var value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer); errorHandler.warning('attribute "'+value+'" missed quot(")!!'); addAttribute(attrName, value, start) case S_ATTR_END: s = S_TAG_SPACE; break; //case S_TAG_SPACE: //case S_EQ: //case S_ATTR_SPACE: // void();break; //case S_TAG_CLOSE: //ignore warning } }else{//not space //S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE //S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE switch(s){ //case S_TAG:void();break; //case S_ATTR:void();break; //case S_ATTR_NOQUOT_VALUE:void();break; case S_ATTR_SPACE: var tagName = el.tagName; if (!NAMESPACE.isHTML(currentNSMap['']) || !attrName.match(/^(?:disabled|checked|selected)$/i)) { errorHandler.warning('attribute "'+attrName+'" missed value!! "'+attrName+'" instead2!!') } addAttribute(attrName, attrName, start); start = p; s = S_ATTR; break; case S_ATTR_END: errorHandler.warning('attribute space is required"'+attrName+'"!!') case S_TAG_SPACE: s = S_ATTR; start = p; break; case S_EQ: s = S_ATTR_NOQUOT_VALUE; start = p; break; case S_TAG_CLOSE: throw new Error("elements closed character '/' and '>' must be connected to"); } } }//end outer switch //console.log('p++',p) p++; } } /** * @return true if has new namespace define */ function appendElement(el,domBuilder,currentNSMap){ var tagName = el.tagName; var localNSMap = null; //var currentNSMap = parseStack[parseStack.length-1].currentNSMap; var i = el.length; while(i--){ var a = el[i]; var qName = a.qName; var value = a.value; var nsp = qName.indexOf(':'); if(nsp>0){ var prefix = a.prefix = qName.slice(0,nsp); var localName = qName.slice(nsp+1); var nsPrefix = prefix === 'xmlns' && localName }else{ localName = qName; prefix = null nsPrefix = qName === 'xmlns' && '' } //can not set prefix,because prefix !== '' a.localName = localName ; //prefix == null for no ns prefix attribute if(nsPrefix !== false){//hack!! if(localNSMap == null){ localNSMap = {} //console.log(currentNSMap,0) _copy(currentNSMap,currentNSMap={}) //console.log(currentNSMap,1) } currentNSMap[nsPrefix] = localNSMap[nsPrefix] = value; a.uri = NAMESPACE.XMLNS domBuilder.startPrefixMapping(nsPrefix, value) } } var i = el.length; while(i--){ a = el[i]; var prefix = a.prefix; if(prefix){//no prefix attribute has no namespace if(prefix === 'xml'){ a.uri = NAMESPACE.XML; }if(prefix !== 'xmlns'){ a.uri = currentNSMap[prefix || ''] //{console.log('###'+a.qName,domBuilder.locator.systemId+'',currentNSMap,a.uri)} } } } var nsp = tagName.indexOf(':'); if(nsp>0){ prefix = el.prefix = tagName.slice(0,nsp); localName = el.localName = tagName.slice(nsp+1); }else{ prefix = null;//important!! localName = el.localName = tagName; } //no prefix element has default namespace var ns = el.uri = currentNSMap[prefix || '']; domBuilder.startElement(ns,localName,tagName,el); //endPrefixMapping and startPrefixMapping have not any help for dom builder //localNSMap = null if(el.closed){ domBuilder.endElement(ns,localName,tagName); if(localNSMap){ for(prefix in localNSMap){ domBuilder.endPrefixMapping(prefix) } } }else{ el.currentNSMap = currentNSMap; el.localNSMap = localNSMap; //parseStack.push(el); return true; } } function parseHtmlSpecialContent(source,elStartEnd,tagName,entityReplacer,domBuilder){ if(/^(?:script|textarea)$/i.test(tagName)){ var elEndStart = source.indexOf('',elStartEnd); var text = source.substring(elStartEnd+1,elEndStart); if(/[&<]/.test(text)){ if(/^script$/i.test(tagName)){ //if(!/\]\]>/.test(text)){ //lexHandler.startCDATA(); domBuilder.characters(text,0,text.length); //lexHandler.endCDATA(); return elEndStart; //} }//}else{//text area text = text.replace(/&#?\w+;/g,entityReplacer); domBuilder.characters(text,0,text.length); return elEndStart; //} } } return elStartEnd+1; } function fixSelfClosed(source,elStartEnd,tagName,closeMap){ //if(tagName in closeMap){ var pos = closeMap[tagName]; if(pos == null){ //console.log(tagName) pos = source.lastIndexOf('') if(pos',start+4); //append comment source.substring(4,end)//