xmlutils.php

Go to the documentation of this file.
00001 <?php
00002   /**
00003    *  @file xmlutils.php
00004    *  XML helper utilities.
00005    *
00006    *  If you need to output elements, never do simple string replacement
00007    *  and then <xsl:value-of disable-output-escaping='yes' ... /> !
00008    *  It's evil and you will definitely at least once forget to properly
00009    *  escape it. Used these routines instead and then <xsl:copy-of ... />.
00010    *
00011    *  Note that code in this file is real WTF, but I was not able to
00012    *  create anything simpler without loosing flexibility.
00013    *  Tell me if you have some good idea how to make this code
00014    *  shorter or more readable. Otherwise you can try to send this
00015    *  code to http://www.thedailywtf.com :-)
00016    */
00017    
00018   /*
00019   Easy PHP Framework
00020 
00021   Copyright (c) 2005 Michal Molhanec
00022 
00023   This software is provided 'as-is', without any express or implied
00024   warranty. In no event will the authors be held liable for any damages
00025   arising from the use of this software.
00026 
00027   Permission is granted to anyone to use this software for any purpose,
00028   including commercial applications, and to alter it and redistribute
00029   it freely, subject to the following restrictions:
00030 
00031       1. The origin of this software must not be misrepresented;
00032          you must not claim that you wrote the original software.
00033          If you use this software in a product, an acknowledgment
00034          in the product documentation would be appreciated but
00035          is not required.
00036 
00037       2. Altered source versions must be plainly marked as such,
00038          and must not be misrepresented as being the original software.
00039 
00040       3. This notice may not be removed or altered from any
00041          source distribution.
00042   */
00043 
00044   /**
00045    *  Instances of this class serves for creating
00046    *  XML elements. To be exact, you can create
00047    *  any <a href='http://www.php.net/manual/en/ref.dom.php'>DOMNode</a>
00048    *  descendant in this class descendants.
00049    */
00050   class ElementCreator {
00051 
00052     /// Element's name
00053     protected $name;
00054     
00055     /// Element's attributes. Keys are used as attribute names,
00056     /// values as their values.
00057     protected $attrs = array();
00058 
00059     /**
00060      *  @param[in] $name Elements name.
00061      *  @param[in] $attrs Array of attributes or NULL. Keys are used as
00062      *                    attribute names, values as their values.
00063      */
00064     function __construct($name, $attrs = NULL) {
00065       $this->name = $name;
00066       if ($attrs):
00067         $this->attrs = $attrs;
00068       endif;
00069     }
00070     
00071     /**
00072      *  Call this to get new XML element. By default it creates a
00073      *  <a href='http://www.php.net/manual/en/ref.dom.php'>DOMElement</a>
00074      *  but descendants can return arbitrary <a href='http://www.php.net/manual/en/ref.dom.php'>DOMNode</a>
00075      *  descendant.
00076      *  @param[in] $doc <a href='http://www.php.net/manual/en/ref.dom.php'>DOMDocument</a> instance.
00077      *  @param[in] $content String with element's content or NULL.
00078      *  @return <a href='http://www.php.net/manual/en/ref.dom.php'>DOMElement</a> instance or
00079      *          other <a href='http://www.php.net/manual/en/ref.dom.php'>DOMNode</a>
00080      *          descendant in descendants of this class.
00081      */
00082     function create_element($doc, $content = NULL) {
00083       $elem = $doc->createElement($this->name);
00084       foreach ($this->attrs as $name => $value):
00085         $elem->setAttribute($name, $value);
00086       endforeach;
00087       if ($content):
00088         $contentNode = $doc->createTextNode($content);
00089         $elem->appendChild($contentNode);
00090       endif;
00091       return $elem;
00092     }
00093     
00094   }
00095   
00096   /**
00097    *  ElementCreator descendant which allows you to set regex for
00098    *  extracting part of passed content text to use for creating the
00099    *  element. I guess that nobody have understood previous sentence.
00100    */
00101   class ExtractingElementCreator extends ElementCreator {
00102   
00103     /// Regular expression used for extracting
00104     protected $regex;
00105     
00106     /**
00107      *  @see ElementCreator::__construct()
00108      *  @param[in] $regex Regular expression used for extracting.
00109      */
00110     function __construct($regex, $name, $attrs = NULL) {
00111       $this->regex = $regex;
00112       parent::__construct($name, $attrs);
00113     }
00114     
00115     /**
00116      *  Creates the XML element.
00117      *  @see ElementCreator::create_element()
00118      *  @param[in] $content String content of the element. It's passed
00119      *                      to the <a href='http://www.php.net/manual/en/function.preg-match.php'>preg_match</a>
00120      *                      function and first captured parenthesized subpattern
00121      *                      is used as a real content of the element.
00122      *  @throw Exception If there is no match.
00123      */
00124     function create_element($doc, $content) {
00125       if (preg_match($this->regex, $content, $matches)):
00126         return parent::create_element($doc, $matches[1]);
00127       endif;
00128       throw new Exception(
00129         sprintf(
00130           "ExtractingElementCreator::create_element() error: Nothing matched! Regex: >%s< String: >%s<",
00131           $this->regex, $content
00132         )
00133       );
00134     }
00135     
00136   }
00137 
00138   /**
00139    *  Helper function for the xml_convert_any2elem() function.
00140    *  It splits the string into an array of <a href='http://www.php.net/manual/en/ref.dom.php'>DOMText</a>
00141    *  and <a href='http://www.php.net/manual/en/ref.dom.php'>DOMElement</a>
00142    *  (or other <a href='http://www.php.net/manual/en/ref.dom.php'>DOMNode</a>
00143    *  descendant) instances.
00144    *  @param[in] $doc <a href='http://www.php.net/manual/en/ref.dom.php'>DOMDocument</a> instance.
00145    *  @param[in] $text String which is going to be converted.
00146    *  @param[in] $str Separator.
00147    *  @param[in] $elemcreator Instance of ElementCreator class.
00148    *  @return Array of DOMNodes, there is always at least one DOMText instance.
00149    */
00150   function str_splitter($doc, $text, $str, $elemcreator) {
00151     $text_parts = explode($str, $text);
00152     $first = TRUE;
00153     $result = array();
00154     foreach ($text_parts as $text_part):
00155       if ($first):
00156         $first = FALSE;
00157       else:
00158         $result[] = $elemcreator->create_element($doc);
00159       endif;
00160       if(strlen($text_part) > 0):
00161         $result[] = $doc->createTextNode($text_part);
00162       endif;
00163     endforeach;
00164     return $result;
00165   }
00166 
00167   /**
00168    *  Helper function for the xml_convert_any2elem() function.
00169    *  It splits the string into an array of <a href='http://www.php.net/manual/en/ref.dom.php'>DOMText</a>
00170    *  and <a href='http://www.php.net/manual/en/ref.dom.php'>DOMElement</a>
00171    *  (or other <a href='http://www.php.net/manual/en/ref.dom.php'>DOMNode</a>
00172    *  descendant) instances.
00173    *  @param[in] $doc <a href='http://www.php.net/manual/en/ref.dom.php'>DOMDocument</a> instance.
00174    *  @param[in] $text String which is going to be converted.
00175    *  @param[in] $regex Regular expression used for splitting the string.
00176    *                    Note that if you need to use rounding brackets
00177    *                    inside of the regex you should mark them
00178    *                    <a href='http://www.php.net/manual/en/reference.pcre.pattern.syntax.php'>non-capturing (?:)</a>
00179    *                    otherwise strange things will happen.
00180    *  @param[in] $elemcreator Instance of ElementCreator class.
00181    *  @return Array of DOMNodes, there is always at least one DOMText instance.
00182    */
00183   function regex_splitter($doc, $text, $regex, $elemcreator) {
00184     $text_parts = preg_split($regex, $text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
00185     $result = array();
00186     foreach ($text_parts as $text_part):
00187       if (preg_match($regex, $text_part)):
00188         $result[] = $elemcreator->create_element($doc, $text_part);
00189       else:
00190         $result[] = $doc->createTextNode($text_part);
00191       endif;
00192     endforeach;
00193     return $result;
00194   }
00195 
00196   /**
00197    *  Converts string to an array of <a href='http://www.php.net/manual/en/ref.dom.php'>DOMNode</a>
00198    *  descendants.
00199    *  @param[in] $doc <a href='http://www.php.net/manual/en/ref.dom.php'>DOMDocument</a> instance.
00200    *  @param[in] $text String which is going to be converted. It can be
00201    *                   also array of <a href='http://www.php.net/manual/en/ref.dom.php'>DOMNode</a>
00202    *                   instances (so you can call this function on a value
00203    *                   returned by another call to this function).
00204    *  @param[in] $str Anything (e.g. plain string or regex) which will be
00205    *                  used for splitting the text. This value will be passed
00206    *                  to the splitter function.
00207    *  @param[in] $elemcreator Instance of ElementCreator class or string
00208    *                          which will be wrapped into a ElementCreator
00209    *                          instance (the string will be used as an elements
00210    *                          name).
00211    *  @param[in] $splitter Function which will be used to split the string.
00212    *                       See str_splitter() and regex_splitter().
00213    *  @param[in] $recurse If is set to true than also text nodes which are
00214    *                      children of existing elements are processed.
00215    *                      Defaults to false.
00216    *  @return Array of DOMNodes, there is always at least one DOMText instance.
00217    */
00218   function xml_convert_any2elem($doc, $text, $str, $elemcreator, $splitter, $recurse = FALSE) {
00219     if (is_string($elemcreator)):
00220       $elemcreator = new ElementCreator($elemcreator);
00221     endif;
00222 
00223     if (is_array($text)):
00224       $result = array();
00225       foreach ($text as $part):
00226         $result = array_merge($result, xml_convert_any2elem($doc, $part, $str, $elemcreator, $splitter, $recurse));
00227       endforeach;
00228       return $result;
00229     endif;
00230     
00231     if ($text instanceof DomText):
00232       return xml_convert_any2elem($doc, $text->wholeText, $str, $elemcreator, $splitter, $recurse);
00233     endif;
00234 
00235     if ($text instanceof DomElement):
00236 
00237       // recursion means that we are converting also content of already
00238       // created elements
00239       // we will convert childnodes, result of their conversion put into
00240       // an array and then replace childnodes with it
00241       if ($recurse):
00242         $result = array();
00243         foreach ($text->childNodes as $childNode):
00244 
00245           // text nodes convert
00246           if ($childNode instanceof DomText):
00247             $result = array_merge($result, xml_convert_any2elem($doc, $childNode, $str, $elemcreator, $splitter, $recurse));
00248 
00249           // element nodes recurse
00250           elseif ($childNode instanceof DomElement):
00251             xml_convert_any2elem($doc, $childNode, $str, $elemcreator, $splitter, $recurse);
00252             $result[] = $childNode;
00253             
00254           // other nodes (e.g. attributes) copy
00255           else:
00256             assert($childNode instanceof DomNode);
00257             $result[] = $childNode;
00258           endif;
00259 
00260         endforeach;
00261         
00262         // now remove existing childnodes ...
00263         $node_list = clone $text->childNodes;
00264         foreach ($node_list as $childNode):
00265           $text->removeChild($childNode);
00266         endforeach;
00267         
00268         // ... and add new ones
00269         foreach ($result as $newChild):
00270           assert($newChild instanceof DomNode);
00271           $text->appendChild($newChild);
00272         endforeach;
00273         
00274       endif;
00275 
00276       // we have to return ourselves for a case that DomElement
00277       // is in the array. see the "if (is_array($text))" part to know why
00278       // is this needed
00279       return array($text);
00280     endif;
00281     
00282     assert(is_string($text));
00283 
00284     if (strlen($text) == 0):
00285       return array($doc->createTextNode(''));
00286     endif;
00287 
00288     return $splitter($doc, $text, $str, $elemcreator);
00289   }
00290 
00291   /**
00292    *  Shortcut for calling xml_convert_any2elem() with str_splitter().
00293    */
00294   function xml_convert_str2elem($doc, $text, $str, $elemcreator, $recurse = FALSE) {
00295     return xml_convert_any2elem($doc, $text, $str, $elemcreator, 'str_splitter', $recurse);
00296   }
00297 
00298   /**
00299    *  Shortcut for calling xml_convert_any2elem() with regex_splitter().
00300    */
00301   function xml_convert_regex2elem($doc, $text, $regex, $elemcreator, $recurse = FALSE) {
00302     return xml_convert_any2elem($doc, $text, $regex, $elemcreator, 'regex_splitter', $recurse);
00303   }
00304 
00305 ?>

Generated on Sat Sep 24 01:26:42 2005 for Easy PHP Framework by  doxygen 1.4.2