tomclegg.net |
character encoding bugs break Biblio / nusoap Posted December 12, 2010 Problem: MediaWiki Biblio extension fails.
The relevant error message, which is provided by nusoap but doesn't end up getting reported, is: XML error parsing SOAP payload on line 306: Invalid character This is caused by the confluence of two problems:
Horrible workaround for both problems at once: --- nusoap.php~ 2006-10-05 19:28:36.000000000 -0400 +++ nusoap.php 2010-12-12 21:29:11.000000000 -0500 @@ -5874,6 +5874,10 @@ $this->debug('No XML declaration'); } $this->debug('Entering soap_parser(), length='.strlen($xml).', encoding='.$encoding); + + $tried_fudge = false; + + xmlparse: // Create an XML parser - why not xml_parser_create_ns? $this->parser = xml_parser_create($this->xml_encoding); // Set the options for parsing the XML data. @@ -5888,6 +5892,16 @@ // Parse the XML file. if(!xml_parse($this->parser,$xml,true)){ + if (!$tried_fudge) { + $fudgexml = preg_replace ('{^<\?xml version="1.0"}', 'xml_encoding); + if (xml_parse ($fudgeparser, $fudgexml, true)) { + $xml = $fudgexml; + $tried_fudge = true; + goto xmlparse; + } + } + // Display an error message. $err = sprintf('XML error parsing SOAP payload on line %d: %s', xml_get_current_line_number($this->parser), --- Biblio.php~ 2006-10-05 19:28:36.000000000 -0400 +++ Biblio.php 2010-12-12 22:37:23.000000000 -0500 @@ -266,6 +266,7 @@ new nusoapclient($server_url, true, $proxyhost, $proxyport, $proxyusername, $proxypassword); + $client->decode_utf8 = false; $err = $client->getError(); if (!$err) { MedaWiki test block: <biblio> #Barash2010 pmid=20445623 #MultipleFluors pmid=15558047 #SmolkeNAR2010 pmid=20385591 #IntronSizeDist pmid=16980575 #Kangueane2004 pmid=15217358 #SmithReview2005 pmid=15956978 #SplicingNomenclature pmid=18688268 </biblio> Broken server response: HTTP/1.1 200 OK Date: Mon, 13 Dec 2010 02:04:11 GMT Server: Apache Content-length: 16028 Content-Type: text/xml; charset="UTF-8" Vary: Accept-Encoding Connection: close <?xml version="1.0"?> <SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" > <SOAP-ENV:Body><eSummaryResult xmlns="http://www.ncbi.nlm.nih.gov/soap/eutils/esummary"> |