00001 <?php 00002 /** 00003 * @file xmlutils.php 00004 * XML helper utilities. 00005 * 00006 * If you need to output elements, never do simple string replacement 00007 * and then <xsl:value-of disable-output-escaping='yes' ... /> ! 00008 * It's evil and you will definitely at least once forget to properly 00009 * escape it. Used these routines instead and then <xsl:copy-of ... />. 00010 * 00011 * Note that code in this file is real WTF, but I was not able to 00012 * create anything simpler without loosing flexibility. 00013 * Tell me if you have some good idea how to make this code 00014 * shorter or more readable. Otherwise you can try to send this 00015 * code to http://www.thedailywtf.com :-) 00016 */ 00017 00018 /* 00019 Easy PHP Framework 00020 00021 Copyright (c) 2005 Michal Molhanec 00022 00023 This software is provided 'as-is', without any express or implied 00024 warranty. In no event will the authors be held liable for any damages 00025 arising from the use of this software. 00026 00027 Permission is granted to anyone to use this software for any purpose, 00028 including commercial applications, and to alter it and redistribute 00029 it freely, subject to the following restrictions: 00030 00031 1. The origin of this software must not be misrepresented; 00032 you must not claim that you wrote the original software. 00033 If you use this software in a product, an acknowledgment 00034 in the product documentation would be appreciated but 00035 is not required. 00036 00037 2. Altered source versions must be plainly marked as such, 00038 and must not be misrepresented as being the original software. 00039 00040 3. This notice may not be removed or altered from any 00041 source distribution. 00042 */ 00043 00044 /** 00045 * Instances of this class serves for creating 00046 * XML elements. To be exact, you can create 00047 * any <a href='http://www.php.net/manual/en/ref.dom.php'>DOMNode</a> 00048 * descendant in this class descendants. 00049 */ 00050 class ElementCreator { 00051 00052 /// Element's name 00053 protected $name; 00054 00055 /// Element's attributes. Keys are used as attribute names, 00056 /// values as their values. 00057 protected $attrs = array(); 00058 00059 /** 00060 * @param[in] $name Elements name. 00061 * @param[in] $attrs Array of attributes or NULL. Keys are used as 00062 * attribute names, values as their values. 00063 */ 00064 function __construct($name, $attrs = NULL) { 00065 $this->name = $name; 00066 if ($attrs): 00067 $this->attrs = $attrs; 00068 endif; 00069 } 00070 00071 /** 00072 * Call this to get new XML element. By default it creates a 00073 * <a href='http://www.php.net/manual/en/ref.dom.php'>DOMElement</a> 00074 * but descendants can return arbitrary <a href='http://www.php.net/manual/en/ref.dom.php'>DOMNode</a> 00075 * descendant. 00076 * @param[in] $doc <a href='http://www.php.net/manual/en/ref.dom.php'>DOMDocument</a> instance. 00077 * @param[in] $content String with element's content or NULL. 00078 * @return <a href='http://www.php.net/manual/en/ref.dom.php'>DOMElement</a> instance or 00079 * other <a href='http://www.php.net/manual/en/ref.dom.php'>DOMNode</a> 00080 * descendant in descendants of this class. 00081 */ 00082 function create_element($doc, $content = NULL) { 00083 $elem = $doc->createElement($this->name); 00084 foreach ($this->attrs as $name => $value): 00085 $elem->setAttribute($name, $value); 00086 endforeach; 00087 if ($content): 00088 $contentNode = $doc->createTextNode($content); 00089 $elem->appendChild($contentNode); 00090 endif; 00091 return $elem; 00092 } 00093 00094 } 00095 00096 /** 00097 * ElementCreator descendant which allows you to set regex for 00098 * extracting part of passed content text to use for creating the 00099 * element. I guess that nobody have understood previous sentence. 00100 */ 00101 class ExtractingElementCreator extends ElementCreator { 00102 00103 /// Regular expression used for extracting 00104 protected $regex; 00105 00106 /** 00107 * @see ElementCreator::__construct() 00108 * @param[in] $regex Regular expression used for extracting. 00109 */ 00110 function __construct($regex, $name, $attrs = NULL) { 00111 $this->regex = $regex; 00112 parent::__construct($name, $attrs); 00113 } 00114 00115 /** 00116 * Creates the XML element. 00117 * @see ElementCreator::create_element() 00118 * @param[in] $content String content of the element. It's passed 00119 * to the <a href='http://www.php.net/manual/en/function.preg-match.php'>preg_match</a> 00120 * function and first captured parenthesized subpattern 00121 * is used as a real content of the element. 00122 * @throw Exception If there is no match. 00123 */ 00124 function create_element($doc, $content) { 00125 if (preg_match($this->regex, $content, $matches)): 00126 return parent::create_element($doc, $matches[1]); 00127 endif; 00128 throw new Exception( 00129 sprintf( 00130 "ExtractingElementCreator::create_element() error: Nothing matched! Regex: >%s< String: >%s<", 00131 $this->regex, $content 00132 ) 00133 ); 00134 } 00135 00136 } 00137 00138 /** 00139 * Helper function for the xml_convert_any2elem() function. 00140 * It splits the string into an array of <a href='http://www.php.net/manual/en/ref.dom.php'>DOMText</a> 00141 * and <a href='http://www.php.net/manual/en/ref.dom.php'>DOMElement</a> 00142 * (or other <a href='http://www.php.net/manual/en/ref.dom.php'>DOMNode</a> 00143 * descendant) instances. 00144 * @param[in] $doc <a href='http://www.php.net/manual/en/ref.dom.php'>DOMDocument</a> instance. 00145 * @param[in] $text String which is going to be converted. 00146 * @param[in] $str Separator. 00147 * @param[in] $elemcreator Instance of ElementCreator class. 00148 * @return Array of DOMNodes, there is always at least one DOMText instance. 00149 */ 00150 function str_splitter($doc, $text, $str, $elemcreator) { 00151 $text_parts = explode($str, $text); 00152 $first = TRUE; 00153 $result = array(); 00154 foreach ($text_parts as $text_part): 00155 if ($first): 00156 $first = FALSE; 00157 else: 00158 $result[] = $elemcreator->create_element($doc); 00159 endif; 00160 if(strlen($text_part) > 0): 00161 $result[] = $doc->createTextNode($text_part); 00162 endif; 00163 endforeach; 00164 return $result; 00165 } 00166 00167 /** 00168 * Helper function for the xml_convert_any2elem() function. 00169 * It splits the string into an array of <a href='http://www.php.net/manual/en/ref.dom.php'>DOMText</a> 00170 * and <a href='http://www.php.net/manual/en/ref.dom.php'>DOMElement</a> 00171 * (or other <a href='http://www.php.net/manual/en/ref.dom.php'>DOMNode</a> 00172 * descendant) instances. 00173 * @param[in] $doc <a href='http://www.php.net/manual/en/ref.dom.php'>DOMDocument</a> instance. 00174 * @param[in] $text String which is going to be converted. 00175 * @param[in] $regex Regular expression used for splitting the string. 00176 * Note that if you need to use rounding brackets 00177 * inside of the regex you should mark them 00178 * <a href='http://www.php.net/manual/en/reference.pcre.pattern.syntax.php'>non-capturing (?:)</a> 00179 * otherwise strange things will happen. 00180 * @param[in] $elemcreator Instance of ElementCreator class. 00181 * @return Array of DOMNodes, there is always at least one DOMText instance. 00182 */ 00183 function regex_splitter($doc, $text, $regex, $elemcreator) { 00184 $text_parts = preg_split($regex, $text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE); 00185 $result = array(); 00186 foreach ($text_parts as $text_part): 00187 if (preg_match($regex, $text_part)): 00188 $result[] = $elemcreator->create_element($doc, $text_part); 00189 else: 00190 $result[] = $doc->createTextNode($text_part); 00191 endif; 00192 endforeach; 00193 return $result; 00194 } 00195 00196 /** 00197 * Converts string to an array of <a href='http://www.php.net/manual/en/ref.dom.php'>DOMNode</a> 00198 * descendants. 00199 * @param[in] $doc <a href='http://www.php.net/manual/en/ref.dom.php'>DOMDocument</a> instance. 00200 * @param[in] $text String which is going to be converted. It can be 00201 * also array of <a href='http://www.php.net/manual/en/ref.dom.php'>DOMNode</a> 00202 * instances (so you can call this function on a value 00203 * returned by another call to this function). 00204 * @param[in] $str Anything (e.g. plain string or regex) which will be 00205 * used for splitting the text. This value will be passed 00206 * to the splitter function. 00207 * @param[in] $elemcreator Instance of ElementCreator class or string 00208 * which will be wrapped into a ElementCreator 00209 * instance (the string will be used as an elements 00210 * name). 00211 * @param[in] $splitter Function which will be used to split the string. 00212 * See str_splitter() and regex_splitter(). 00213 * @param[in] $recurse If is set to true than also text nodes which are 00214 * children of existing elements are processed. 00215 * Defaults to false. 00216 * @return Array of DOMNodes, there is always at least one DOMText instance. 00217 */ 00218 function xml_convert_any2elem($doc, $text, $str, $elemcreator, $splitter, $recurse = FALSE) { 00219 if (is_string($elemcreator)): 00220 $elemcreator = new ElementCreator($elemcreator); 00221 endif; 00222 00223 if (is_array($text)): 00224 $result = array(); 00225 foreach ($text as $part): 00226 $result = array_merge($result, xml_convert_any2elem($doc, $part, $str, $elemcreator, $splitter, $recurse)); 00227 endforeach; 00228 return $result; 00229 endif; 00230 00231 if ($text instanceof DomText): 00232 return xml_convert_any2elem($doc, $text->wholeText, $str, $elemcreator, $splitter, $recurse); 00233 endif; 00234 00235 if ($text instanceof DomElement): 00236 00237 // recursion means that we are converting also content of already 00238 // created elements 00239 // we will convert childnodes, result of their conversion put into 00240 // an array and then replace childnodes with it 00241 if ($recurse): 00242 $result = array(); 00243 foreach ($text->childNodes as $childNode): 00244 00245 // text nodes convert 00246 if ($childNode instanceof DomText): 00247 $result = array_merge($result, xml_convert_any2elem($doc, $childNode, $str, $elemcreator, $splitter, $recurse)); 00248 00249 // element nodes recurse 00250 elseif ($childNode instanceof DomElement): 00251 xml_convert_any2elem($doc, $childNode, $str, $elemcreator, $splitter, $recurse); 00252 $result[] = $childNode; 00253 00254 // other nodes (e.g. attributes) copy 00255 else: 00256 assert($childNode instanceof DomNode); 00257 $result[] = $childNode; 00258 endif; 00259 00260 endforeach; 00261 00262 // now remove existing childnodes ... 00263 $node_list = clone $text->childNodes; 00264 foreach ($node_list as $childNode): 00265 $text->removeChild($childNode); 00266 endforeach; 00267 00268 // ... and add new ones 00269 foreach ($result as $newChild): 00270 assert($newChild instanceof DomNode); 00271 $text->appendChild($newChild); 00272 endforeach; 00273 00274 endif; 00275 00276 // we have to return ourselves for a case that DomElement 00277 // is in the array. see the "if (is_array($text))" part to know why 00278 // is this needed 00279 return array($text); 00280 endif; 00281 00282 assert(is_string($text)); 00283 00284 if (strlen($text) == 0): 00285 return array($doc->createTextNode('')); 00286 endif; 00287 00288 return $splitter($doc, $text, $str, $elemcreator); 00289 } 00290 00291 /** 00292 * Shortcut for calling xml_convert_any2elem() with str_splitter(). 00293 */ 00294 function xml_convert_str2elem($doc, $text, $str, $elemcreator, $recurse = FALSE) { 00295 return xml_convert_any2elem($doc, $text, $str, $elemcreator, 'str_splitter', $recurse); 00296 } 00297 00298 /** 00299 * Shortcut for calling xml_convert_any2elem() with regex_splitter(). 00300 */ 00301 function xml_convert_regex2elem($doc, $text, $regex, $elemcreator, $recurse = FALSE) { 00302 return xml_convert_any2elem($doc, $text, $regex, $elemcreator, 'regex_splitter', $recurse); 00303 } 00304 00305 ?>
1.4.2