phpgroupware-cvs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Phpgroupware-cvs] phpgwapi/inc functions.inc.php, 1.121.2.13.2.23 class


From: skwashd
Subject: [Phpgroupware-cvs] phpgwapi/inc functions.inc.php, 1.121.2.13.2.23 class.data_cleaner.inc.php, 1.1.2.1
Date: Wed, 24 Aug 2005 15:46:00 +0200

Update of phpgwapi/inc

Modified Files:
     Branch: Version-0_9_16-branch
            functions.inc.php lines: +27 -1
Added Files:
     Branch: Version-0_9_16-branch
            class.data_cleaner.inc.php lines: +254 -0

Log Message:
XSS fix

====================================================
Index: phpgwapi/inc/functions.inc.php
diff -u phpgwapi/inc/functions.inc.php:1.121.2.13.2.22 
phpgwapi/inc/functions.inc.php:1.121.2.13.2.23
--- phpgwapi/inc/functions.inc.php:1.121.2.13.2.22      Sat Nov  6 15:34:26 2004
+++ phpgwapi/inc/functions.inc.php      Wed Aug 24 13:46:44 2005
@@ -116,6 +116,32 @@
 // Can't use this yet - errorlog hasn't been created.
 //     print_debug('domain',@$GLOBALS['phpgw_info']['user']['domain'],'api');

+       // Remove this and you will loose important parts of your anatomy - 
skwashd
+       $GLOBALS['RAW_REQUEST'] = $_REQUEST; // if you really need the raw value
+       $to_cleans = array('_GET', '_POST', '_COOKIE', '_REQUEST');
+       $data_cleaner = createObject('phpgwapi.data_cleaner', '');
+       foreach ( $to_cleans as $to_clean )
+       {
+               if ( isset($GLOBALS[$to_clean]) && 
is_array($GLOBALS[$to_clean]) && count($GLOBALS[$to_clean]) )
+               {
+                       foreach ( $GLOBALS[$to_clean] as $key => $val )
+                       {
+                               if ( !is_array($val) )
+                               {
+                                       $GLOBALS[$to_clean][$key] = 
$data_cleaner->clean($val);
+                               }
+                               else
+                               {
+                                       foreach ( $val as $skey => $sval )
+                                       {
+                                               
$GLOBALS[$to_clean][$key][$skey] = $data_cleaner->clean($val);
+                                       }
+                               }
+                       }
+                       $GLOBALS["HTTP{$to_change}_VARS"] = 
$GLOBALS[$to_change];
+               }
+       }
+
         
/****************************************************************************\
         * These lines load up the API, fill up the $phpgw_info array, etc      
      *
         
\****************************************************************************/

====================================================
Index: class.data_cleaner.inc.php
<?php
/**
 * HTML Sanitizer, attemtpts to make variables safe for users.
 * $Id: class.data_cleaner.inc.php,v 1.1.2.1 2005/08/24 13:46:44 skwashd Exp $
 *
 * Taken from the horde project by Dave Hall for use in phpGroupWare
 *
 * Copyright 1999-2005 Anil Madhavapeddy <address@hidden>
 * Copyright 1999-2005 Jon Parise <address@hidden>
 * Copyright 2002-2005 Michael Slusarz <address@hidden>
 * Portions Copyright 2005 Free Software Foundation Inc http://fsf.org
 *
 * See the enclosed file COPYING for license information (GPL). If you
 * did not receive this file, see http://www.fsf.org/copyleft/gpl.html.
 *
 * @author  Anil Madhavapeddy <address@hidden>
 * @author  Jon Parise <address@hidden>
 * @author  Michael Slusarz <address@hidden>
 * @author  Dave Hall skwashd at phpgroupware.org
 * @since   phpGroupWare 0.9.16.007
 * @package API
 */
class data_cleaner
{
        /**
        * @var string $data the data
        */
        var $data;

        /**
        * @constructor
        *
        * @param string $data the data to be cleaned
        */
        function data_cleaner($data = '')
        {
                $this->html = $data;
        }

        /**
         * Render out the currently set contents.
         *
         * @param String $data the raw data.
         *
         * @return string  The cleaned data.
         */
        function clean($data = null)
        {
                if ( !is_null($data) )
                {
                        return $this->_clean_data($data);
                }
                return $this->_clean_data($this->data);
        }

        /**
         * These regular expressions attempt to make HTML safe for
         * viewing. THEY ARE NOT PERFECT.
         *
         * @access private
         *
         * @param string $data  The HTML data.
         *
         * @return string  The cleaned HTML data.
         */
        function _clean_data(&$data)
        {
                /* Deal with <base> tags in the HTML, since they will screw up
                 * our own relative paths. */
                if (($i = stristr($data, '<base ')) && ($i = stristr($i, 
'http')) &&
                                ($j = strchr($i, '>')))
                {
                        $base = substr($i, 0, strlen($i) - strlen($j));
                        $base = preg_replace('|(http.*://[^/]*/?).*|i', '\1', 
$base);

                        if ($base[strlen($base) - 1] != '/')
                        {
                                $base .= '/';
                        }

                        /* Recursively call this->_clean_data() to prevent 
clever fiends
                         * from sneaking nasty things into the page via $base. 
*/
                        $base = $this->_clean_data($base);
                }

                /* Removes HTML comments (including some scripts & styles). */
                $data = preg_replace('/<!--.*?-->/s', '', $data);

                /* Change space entities to space characters. */
                $data = preg_replace('/&#(x0*20|0*32);?/i', ' ', $data);

                /* Nuke non-printable characters (a play in three acts). */

                /* Rule 1). If we have a semicolon, it is deterministically
                 * detectable and fixable, without introducing collateral
                 * damage. */
                $data = preg_replace('/&#x?0*([9A-D]|1[0-3]);/i', '&nbsp;', 
$data);

                /* Rule 2). Hex numbers (usually having an x prefix) are also
                 * deterministic, even if we don't have the semi. Note that
                 * some browsers will treat &#a or &#0a as a hex number even
                 * without the x prefix; hence /x?/ which will cover those
                 * cases in this rule. */
                $data = preg_replace('/&#x?0*[9A-D]([^0-9A-F]|$)/i', 
'&nbsp\\1', $data);

                /* Rule 3). Decimal numbers without trailing semicolons. The
                 * problem is that some browsers will interpret &#10a as
                 * "\na", some as "&#x10a" so we have to clean the &#10 to be
                 * safe for the "\na" case at the expense of mangling a valid
                 * entity in other cases. (Solution for valid HTML authors:
                 * always use the semicolon.) */
                $data = preg_replace('/&#0*(9|1[0-3])([^0-9]|$)/i', '&nbsp\\2', 
$data);

                /* Remove overly long numeric entities. */
                $data = preg_replace('/&#x?0*[0-9A-F]{6,};?/i', '&nbsp;', 
$data);

                /* Remove everything outside of and including the <body> tag
                 * if displaying inline. */
                if (!$attachment) {
                        $data = preg_replace('/.*<body[^>]*>/si', '', $data);
                        $data = preg_replace('/<\/body>.*/si', '', $data);
                }

                /* Get all attribute="javascript:foo()" tags. This is
                 * essentially the regex /(=|url\()("?)[^>]*script:/ but
                 * expanded to catch camouflage with spaces and entities. */
                $preg = '/((&#0*61;?|&#x0*3D;?|=)|' .
                                
'((u|&#0*85;?|&#x0*55;?|&#0*117;?|&#x0*75;?)\s*' .
                                        
'(r|&#0*82;?|&#x0*52;?|&#0*114;?|&#x0*72;?)\s*' .
                                        
'(l|&#0*76;?|&#x0*4c;?|&#0*108;?|&#x0*6c;?)\s*' .
                                        '(\()))\s*' .
                        '(&#0*34;?|&#x0*22;?|"|&#0*39;?|&#x0*27;?|\')?' .
                        '[^>]*\s*' .
                        '(s|&#0*83;?|&#x0*53;?|&#0*115;?|&#x0*73;?)\s*' .
                        '(c|&#0*67;?|&#x0*43;?|&#0*99;?|&#x0*63;?)\s*' .
                        '(r|&#0*82;?|&#x0*52;?|&#0*114;?|&#x0*72;?)\s*' .
                        '(i|&#0*73;?|&#x0*49;?|&#0*105;?|&#x0*69;?)\s*' .
                        '(p|&#0*80;?|&#x0*50;?|&#0*112;?|&#x0*70;?)\s*' .
                        '(t|&#0*84;?|&#x0*54;?|&#0*116;?|&#x0*74;?)\s*' .
                        '(:|&#0*58;?|&#x0*3a;?)/i';
                $data = preg_replace($preg, '\1\8VarCleaned', $data);

                /* Get all on<foo>="bar()". NEVER allow these. */
                $data = preg_replace('/([\s"\']+' .
                                        
'(o|&#0*79;?|&#0*4f;?|&#0*111;?|&#0*6f;?)' .
                                        
'(n|&#0*78;?|&#0*4e;?|&#0*110;?|&#0*6e;?)' .
                                        '\w+)\s*=/i', '\1VarCleaned=', $data);

                /* Remove all scripts since they might introduce garbage if
                 * they are not quoted properly. */
                $data = preg_replace('|<script[^>]*>.*?</script>|is', 
'<VarCleaned_script />', $data);

                /* Get all tags that might cause trouble - <object>, <embed>,
                 * <base>, etc. Meta refreshes and iframes, too. */
                $malicious = array(
                                '/<([^>a-z]*)' .
                                '(s|&#0*83;?|&#x0*53;?|&#0*115;?|&#x0*73;?)\s*' 
.
                                '(c|&#0*67;?|&#x0*43;?|&#0*99;?|&#x0*63;?)\s*' .
                                '(r|&#0*82;?|&#x0*52;?|&#0*114;?|&#x0*72;?)\s*' 
.
                                '(i|&#0*73;?|&#x0*49;?|&#0*105;?|&#x0*69;?)\s*' 
.
                                '(p|&#0*80;?|&#x0*50;?|&#0*112;?|&#x0*70;?)\s*' 
.
                                
'(t|&#0*84;?|&#x0*54;?|&#0*116;?|&#x0*74;?)\s*/i',

                                '/<([^>a-z]*)' .
                                '(e|&#0*69;?|&#0*45;?|&#0*101;?|&#0*65;?)\s*' .
                                '(m|&#0*77;?|&#0*4d;?|&#0*109;?|&#0*6d;?)\s*' .
                                '(b|&#0*66;?|&#0*42;?|&#0*98;?|&#0*62;?)\s*' .
                                '(e|&#0*69;?|&#0*45;?|&#0*101;?|&#0*65;?)\s*' .
                                '(d|&#0*68;?|&#0*44;?|&#0*100;?|&#0*64;?)\s*/i',

                                '/<([^>a-z]*)' .
                                '(x|&#0*88;?|&#0*58;?|&#0*120;?|&#0*78;?)\s*' .
                                '(m|&#0*77;?|&#0*4d;?|&#0*109;?|&#0*6d;?)\s*' .
                                
'(l|&#0*76;?|&#x0*4c;?|&#0*108;?|&#x0*6c;?)\s*/i',

                                '/<([^>a-z]*)' .
                                        
'(b|&#0*66;?|&#0*42;?|&#0*98;?|&#0*62;?)\s*' .
                                        
'(a|&#0*65;?|&#0*41;?|&#0*97;?|&#0*61;?)\s*' .
                                        
'(s|&#0*83;?|&#x0*53;?|&#0*115;?|&#x0*73;?)\s*' .
                                        
'(e|&#0*69;?|&#0*45;?|&#0*101;?|&#0*65;?)\s*' .
                                        '[^line]/i',

                                '/<([^>a-z]*)' .
                                        
'(m|&#0*77;?|&#0*4d;?|&#0*109;?|&#0*6d;?)\s*' .
                                        
'(e|&#0*69;?|&#0*45;?|&#0*101;?|&#0*65;?)\s*' .
                                        
'(t|&#0*84;?|&#x0*54;?|&#0*116;?|&#x0*74;?)\s*' .
                                        
'(a|&#0*65;?|&#0*41;?|&#0*97;?|&#0*61;?)\s*/i',

                                '/<([^>a-z]*)' .
                                        
'(j|&#0*74;?|&#0*4a;?|&#0*106;?|&#0*6a;?)\s*' .
                                        
'(a|&#0*65;?|&#0*41;?|&#0*97;?|&#0*61;?)\s*' .
                                        
'(v|&#0*86;?|&#0*56;?|&#0*118;?|&#0*76;?)\s*' .
                                        
'(a|&#0*65;?|&#0*41;?|&#0*97;?|&#0*61;?)\s*/i',

                                '/<([^>a-z]*)' .
                                        
'(o|&#0*79;?|&#0*4f;?|&#0*111;?|&#0*6f;?)\s*' .
                                        
'(b|&#0*66;?|&#0*42;?|&#0*98;?|&#0*62;?)\s*' .
                                        
'(j|&#0*74;?|&#0*4a;?|&#0*106;?|&#0*6a;?)\s*' .
                                        
'(e|&#0*69;?|&#0*45;?|&#0*101;?|&#0*65;?)\s*' .
                                        
'(c|&#0*67;?|&#x0*43;?|&#0*99;?|&#x0*63;?)\s*' .
                                        
'(t|&#0*84;?|&#x0*54;?|&#0*116;?|&#x0*74;?)\s*/i',

                                '/<([^>a-z]*)' .
                                        
'(i|&#0*73;?|&#x0*49;?|&#0*105;?|&#x0*69;?)\s*' .
                                        
'(f|&#0*70;?|&#0*46;?|&#0*102;?|&#0*66;?)\s*' .
                                        
'(r|&#0*82;?|&#x0*52;?|&#0*114;?|&#x0*72;?)\s*' .
                                        
'(a|&#0*65;?|&#0*41;?|&#0*97;?|&#0*61;?)\s*' .
                                        
'(m|&#0*77;?|&#0*4d;?|&#0*109;?|&#0*6d;?)\s*' .
                                        
'(e|&#0*69;?|&#0*45;?|&#0*101;?|&#0*65;?)\s*/i');

                $data = preg_replace($malicious, '<VarCleaned_tag', $data);

                /* Comment out style/link tags. */
                $pattern = array('/\s+style\s*=/i',
                                '|<style[^>]*>(?:\s*<\!--)*|i',
                                '|(?:-->\s*)*</style>|i',
                                '|(<link[^>]*>)|i');
                $replace = array(' VarCleaned=',
                                '<!--',
                                '-->',
                                '<!-- $1 -->');
                $data = preg_replace($pattern, $replace, $data);

                /* A few other matches. */
                $pattern = array('|<([^>]*)&{.*}([^>]*)>|',
                                '|<([^>]*)mocha:([^>]*)>|i',
                                '|<([^>]*)binding:([^>]*)>|i');
                $replace = array('<&{;}\3>',
                                '<\1VarCleaned:\2>',
                                '<\1VarCleaned:\2>');
                $data = preg_replace($pattern, $replace, $data);

                /* Attempt to fix paths that were relying on a <base> tag. */
                if (!empty($base)) {
                        $pattern = array('|src=(["\'])/|i',
                                        '|src=[^\'"]/|i',
                                        '|href= *(["\'])/|i',
                                        '|href= *[^\'"]/|i');
                        $replace = array('src=\1' . $base,
                                        'src=' . $base,
                                        'href=\1' . $base,
                                        'href=' . $base);
                        $data = preg_replace($pattern, $replace, $data);
                }

                /* Try to derefer all external references. */
                $data = 
preg_replace_callback('/href\s*=\s*(["\'])?((?(1)[^\1]*?|[^\s]+))(?(1)\1|)/i',
                                create_function('$m', 'return \'href="\' . 
(strlen($m[2]) && $m[2]{0} == \'#\' ? $m[2] : 
$GLOBALS[\'phpgw\']->safe_redirect($m[2])) . \'"\';'),
                                $data);

                return $data;
        }
}
?>






reply via email to

[Prev in Thread] Current Thread [Next in Thread]