You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1080 lines
28 KiB

7 years ago
  1. <?php
  2. /**
  3. * CodeIgniter
  4. *
  5. * An open source application development framework for PHP
  6. *
  7. * This content is released under the MIT License (MIT)
  8. *
  9. * Copyright (c) 2014 - 2017, British Columbia Institute of Technology
  10. *
  11. * Permission is hereby granted, free of charge, to any person obtaining a copy
  12. * of this software and associated documentation files (the "Software"), to deal
  13. * in the Software without restriction, including without limitation the rights
  14. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  15. * copies of the Software, and to permit persons to whom the Software is
  16. * furnished to do so, subject to the following conditions:
  17. *
  18. * The above copyright notice and this permission notice shall be included in
  19. * all copies or substantial portions of the Software.
  20. *
  21. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  22. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  23. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  24. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  25. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  26. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  27. * THE SOFTWARE.
  28. *
  29. * @package CodeIgniter
  30. * @author EllisLab Dev Team
  31. * @copyright Copyright (c) 2008 - 2014, EllisLab, Inc. (https://ellislab.com/)
  32. * @copyright Copyright (c) 2014 - 2017, British Columbia Institute of Technology (http://bcit.ca/)
  33. * @license http://opensource.org/licenses/MIT MIT License
  34. * @link https://codeigniter.com
  35. * @since Version 1.0.0
  36. * @filesource
  37. */
  38. defined('BASEPATH') OR exit('No direct script access allowed');
  39. /**
  40. * Security Class
  41. *
  42. * @package CodeIgniter
  43. * @subpackage Libraries
  44. * @category Security
  45. * @author EllisLab Dev Team
  46. * @link https://codeigniter.com/user_guide/libraries/security.html
  47. */
  48. class CI_Security {
  49. /**
  50. * List of sanitize filename strings
  51. *
  52. * @var array
  53. */
  54. public $filename_bad_chars = array(
  55. '../', '<!--', '-->', '<', '>',
  56. "'", '"', '&', '$', '#',
  57. '{', '}', '[', ']', '=',
  58. ';', '?', '%20', '%22',
  59. '%3c', // <
  60. '%253c', // <
  61. '%3e', // >
  62. '%0e', // >
  63. '%28', // (
  64. '%29', // )
  65. '%2528', // (
  66. '%26', // &
  67. '%24', // $
  68. '%3f', // ?
  69. '%3b', // ;
  70. '%3d' // =
  71. );
  72. /**
  73. * Character set
  74. *
  75. * Will be overridden by the constructor.
  76. *
  77. * @var string
  78. */
  79. public $charset = 'UTF-8';
  80. /**
  81. * XSS Hash
  82. *
  83. * Random Hash for protecting URLs.
  84. *
  85. * @var string
  86. */
  87. protected $_xss_hash;
  88. /**
  89. * CSRF Hash
  90. *
  91. * Random hash for Cross Site Request Forgery protection cookie
  92. *
  93. * @var string
  94. */
  95. protected $_csrf_hash;
  96. /**
  97. * CSRF Expire time
  98. *
  99. * Expiration time for Cross Site Request Forgery protection cookie.
  100. * Defaults to two hours (in seconds).
  101. *
  102. * @var int
  103. */
  104. protected $_csrf_expire = 7200;
  105. /**
  106. * CSRF Token name
  107. *
  108. * Token name for Cross Site Request Forgery protection cookie.
  109. *
  110. * @var string
  111. */
  112. protected $_csrf_token_name = 'ci_csrf_token';
  113. /**
  114. * CSRF Cookie name
  115. *
  116. * Cookie name for Cross Site Request Forgery protection cookie.
  117. *
  118. * @var string
  119. */
  120. protected $_csrf_cookie_name = 'ci_csrf_token';
  121. /**
  122. * List of never allowed strings
  123. *
  124. * @var array
  125. */
  126. protected $_never_allowed_str = array(
  127. 'document.cookie' => '[removed]',
  128. 'document.write' => '[removed]',
  129. '.parentNode' => '[removed]',
  130. '.innerHTML' => '[removed]',
  131. '-moz-binding' => '[removed]',
  132. '<!--' => '&lt;!--',
  133. '-->' => '--&gt;',
  134. '<![CDATA[' => '&lt;![CDATA[',
  135. '<comment>' => '&lt;comment&gt;',
  136. '<%' => '&lt;&#37;'
  137. );
  138. /**
  139. * List of never allowed regex replacements
  140. *
  141. * @var array
  142. */
  143. protected $_never_allowed_regex = array(
  144. 'javascript\s*:',
  145. '(document|(document\.)?window)\.(location|on\w*)',
  146. 'expression\s*(\(|&\#40;)', // CSS and IE
  147. 'vbscript\s*:', // IE, surprise!
  148. 'wscript\s*:', // IE
  149. 'jscript\s*:', // IE
  150. 'vbs\s*:', // IE
  151. 'Redirect\s+30\d',
  152. "([\"'])?data\s*:[^\\1]*?base64[^\\1]*?,[^\\1]*?\\1?"
  153. );
  154. /**
  155. * Class constructor
  156. *
  157. * @return void
  158. */
  159. public function __construct()
  160. {
  161. // Is CSRF protection enabled?
  162. if (config_item('csrf_protection'))
  163. {
  164. // CSRF config
  165. foreach (array('csrf_expire', 'csrf_token_name', 'csrf_cookie_name') as $key)
  166. {
  167. if (NULL !== ($val = config_item($key)))
  168. {
  169. $this->{'_'.$key} = $val;
  170. }
  171. }
  172. // Append application specific cookie prefix
  173. if ($cookie_prefix = config_item('cookie_prefix'))
  174. {
  175. $this->_csrf_cookie_name = $cookie_prefix.$this->_csrf_cookie_name;
  176. }
  177. // Set the CSRF hash
  178. $this->_csrf_set_hash();
  179. }
  180. $this->charset = strtoupper(config_item('charset'));
  181. log_message('info', 'Security Class Initialized');
  182. }
  183. // --------------------------------------------------------------------
  184. /**
  185. * CSRF Verify
  186. *
  187. * @return CI_Security
  188. */
  189. public function csrf_verify()
  190. {
  191. // If it's not a POST request we will set the CSRF cookie
  192. if (strtoupper($_SERVER['REQUEST_METHOD']) !== 'POST')
  193. {
  194. return $this->csrf_set_cookie();
  195. }
  196. // Check if URI has been whitelisted from CSRF checks
  197. if ($exclude_uris = config_item('csrf_exclude_uris'))
  198. {
  199. $uri = load_class('URI', 'core');
  200. foreach ($exclude_uris as $excluded)
  201. {
  202. if (preg_match('#^'.$excluded.'$#i'.(UTF8_ENABLED ? 'u' : ''), $uri->uri_string()))
  203. {
  204. return $this;
  205. }
  206. }
  207. }
  208. // Check CSRF token validity, but don't error on mismatch just yet - we'll want to regenerate
  209. $valid = isset($_POST[$this->_csrf_token_name], $_COOKIE[$this->_csrf_cookie_name])
  210. && hash_equals($_POST[$this->_csrf_token_name], $_COOKIE[$this->_csrf_cookie_name]);
  211. // We kill this since we're done and we don't want to pollute the _POST array
  212. unset($_POST[$this->_csrf_token_name]);
  213. // Regenerate on every submission?
  214. if (config_item('csrf_regenerate'))
  215. {
  216. // Nothing should last forever
  217. unset($_COOKIE[$this->_csrf_cookie_name]);
  218. $this->_csrf_hash = NULL;
  219. }
  220. $this->_csrf_set_hash();
  221. $this->csrf_set_cookie();
  222. if ($valid !== TRUE)
  223. {
  224. $this->csrf_show_error();
  225. }
  226. log_message('info', 'CSRF token verified');
  227. return $this;
  228. }
  229. // --------------------------------------------------------------------
  230. /**
  231. * CSRF Set Cookie
  232. *
  233. * @codeCoverageIgnore
  234. * @return CI_Security
  235. */
  236. public function csrf_set_cookie()
  237. {
  238. $expire = time() + $this->_csrf_expire;
  239. $secure_cookie = (bool) config_item('cookie_secure');
  240. if ($secure_cookie && ! is_https())
  241. {
  242. return FALSE;
  243. }
  244. setcookie(
  245. $this->_csrf_cookie_name,
  246. $this->_csrf_hash,
  247. $expire,
  248. config_item('cookie_path'),
  249. config_item('cookie_domain'),
  250. $secure_cookie,
  251. config_item('cookie_httponly')
  252. );
  253. log_message('info', 'CSRF cookie sent');
  254. return $this;
  255. }
  256. // --------------------------------------------------------------------
  257. /**
  258. * Show CSRF Error
  259. *
  260. * @return void
  261. */
  262. public function csrf_show_error()
  263. {
  264. show_error('The action you have requested is not allowed.', 403);
  265. }
  266. // --------------------------------------------------------------------
  267. /**
  268. * Get CSRF Hash
  269. *
  270. * @see CI_Security::$_csrf_hash
  271. * @return string CSRF hash
  272. */
  273. public function get_csrf_hash()
  274. {
  275. return $this->_csrf_hash;
  276. }
  277. // --------------------------------------------------------------------
  278. /**
  279. * Get CSRF Token Name
  280. *
  281. * @see CI_Security::$_csrf_token_name
  282. * @return string CSRF token name
  283. */
  284. public function get_csrf_token_name()
  285. {
  286. return $this->_csrf_token_name;
  287. }
  288. // --------------------------------------------------------------------
  289. /**
  290. * XSS Clean
  291. *
  292. * Sanitizes data so that Cross Site Scripting Hacks can be
  293. * prevented. This method does a fair amount of work but
  294. * it is extremely thorough, designed to prevent even the
  295. * most obscure XSS attempts. Nothing is ever 100% foolproof,
  296. * of course, but I haven't been able to get anything passed
  297. * the filter.
  298. *
  299. * Note: Should only be used to deal with data upon submission.
  300. * It's not something that should be used for general
  301. * runtime processing.
  302. *
  303. * @link http://channel.bitflux.ch/wiki/XSS_Prevention
  304. * Based in part on some code and ideas from Bitflux.
  305. *
  306. * @link http://ha.ckers.org/xss.html
  307. * To help develop this script I used this great list of
  308. * vulnerabilities along with a few other hacks I've
  309. * harvested from examining vulnerabilities in other programs.
  310. *
  311. * @param string|string[] $str Input data
  312. * @param bool $is_image Whether the input is an image
  313. * @return string
  314. */
  315. public function xss_clean($str, $is_image = FALSE)
  316. {
  317. // Is the string an array?
  318. if (is_array($str))
  319. {
  320. foreach ($str as $key => &$value)
  321. {
  322. $str[$key] = $this->xss_clean($value);
  323. }
  324. return $str;
  325. }
  326. // Remove Invisible Characters
  327. $str = remove_invisible_characters($str);
  328. /*
  329. * URL Decode
  330. *
  331. * Just in case stuff like this is submitted:
  332. *
  333. * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
  334. *
  335. * Note: Use rawurldecode() so it does not remove plus signs
  336. */
  337. if (stripos($str, '%') !== false)
  338. {
  339. do
  340. {
  341. $oldstr = $str;
  342. $str = rawurldecode($str);
  343. $str = preg_replace_callback('#%(?:\s*[0-9a-f]){2,}#i', array($this, '_urldecodespaces'), $str);
  344. }
  345. while ($oldstr !== $str);
  346. unset($oldstr);
  347. }
  348. /*
  349. * Convert character entities to ASCII
  350. *
  351. * This permits our tests below to work reliably.
  352. * We only convert entities that are within tags since
  353. * these are the ones that will pose security problems.
  354. */
  355. $str = preg_replace_callback("/[^a-z0-9>]+[a-z0-9]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
  356. $str = preg_replace_callback('/<\w+.*/si', array($this, '_decode_entity'), $str);
  357. // Remove Invisible Characters Again!
  358. $str = remove_invisible_characters($str);
  359. /*
  360. * Convert all tabs to spaces
  361. *
  362. * This prevents strings like this: ja vascript
  363. * NOTE: we deal with spaces between characters later.
  364. * NOTE: preg_replace was found to be amazingly slow here on
  365. * large blocks of data, so we use str_replace.
  366. */
  367. $str = str_replace("\t", ' ', $str);
  368. // Capture converted string for later comparison
  369. $converted_string = $str;
  370. // Remove Strings that are never allowed
  371. $str = $this->_do_never_allowed($str);
  372. /*
  373. * Makes PHP tags safe
  374. *
  375. * Note: XML tags are inadvertently replaced too:
  376. *
  377. * <?xml
  378. *
  379. * But it doesn't seem to pose a problem.
  380. */
  381. if ($is_image === TRUE)
  382. {
  383. // Images have a tendency to have the PHP short opening and
  384. // closing tags every so often so we skip those and only
  385. // do the long opening tags.
  386. $str = preg_replace('/<\?(php)/i', '&lt;?\\1', $str);
  387. }
  388. else
  389. {
  390. $str = str_replace(array('<?', '?'.'>'), array('&lt;?', '?&gt;'), $str);
  391. }
  392. /*
  393. * Compact any exploded words
  394. *
  395. * This corrects words like: j a v a s c r i p t
  396. * These words are compacted back to their correct state.
  397. */
  398. $words = array(
  399. 'javascript', 'expression', 'vbscript', 'jscript', 'wscript',
  400. 'vbs', 'script', 'base64', 'applet', 'alert', 'document',
  401. 'write', 'cookie', 'window', 'confirm', 'prompt', 'eval'
  402. );
  403. foreach ($words as $word)
  404. {
  405. $word = implode('\s*', str_split($word)).'\s*';
  406. // We only want to do this when it is followed by a non-word character
  407. // That way valid stuff like "dealer to" does not become "dealerto"
  408. $str = preg_replace_callback('#('.substr($word, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
  409. }
  410. /*
  411. * Remove disallowed Javascript in links or img tags
  412. * We used to do some version comparisons and use of stripos(),
  413. * but it is dog slow compared to these simplified non-capturing
  414. * preg_match(), especially if the pattern exists in the string
  415. *
  416. * Note: It was reported that not only space characters, but all in
  417. * the following pattern can be parsed as separators between a tag name
  418. * and its attributes: [\d\s"\'`;,\/\=\(\x00\x0B\x09\x0C]
  419. * ... however, remove_invisible_characters() above already strips the
  420. * hex-encoded ones, so we'll skip them below.
  421. */
  422. do
  423. {
  424. $original = $str;
  425. if (preg_match('/<a/i', $str))
  426. {
  427. $str = preg_replace_callback('#<a(?:rea)?[^a-z0-9>]+([^>]*?)(?:>|$)#si', array($this, '_js_link_removal'), $str);
  428. }
  429. if (preg_match('/<img/i', $str))
  430. {
  431. $str = preg_replace_callback('#<img[^a-z0-9]+([^>]*?)(?:\s?/?>|$)#si', array($this, '_js_img_removal'), $str);
  432. }
  433. if (preg_match('/script|xss/i', $str))
  434. {
  435. $str = preg_replace('#</*(?:script|xss).*?>#si', '[removed]', $str);
  436. }
  437. }
  438. while ($original !== $str);
  439. unset($original);
  440. /*
  441. * Sanitize naughty HTML elements
  442. *
  443. * If a tag containing any of the words in the list
  444. * below is found, the tag gets converted to entities.
  445. *
  446. * So this: <blink>
  447. * Becomes: &lt;blink&gt;
  448. */
  449. $pattern = '#'
  450. .'<((?<slash>/*\s*)((?<tagName>[a-z0-9]+)(?=[^a-z0-9]|$)|.+)' // tag start and name, followed by a non-tag character
  451. .'[^\s\042\047a-z0-9>/=]*' // a valid attribute character immediately after the tag would count as a separator
  452. // optional attributes
  453. .'(?<attributes>(?:[\s\042\047/=]*' // non-attribute characters, excluding > (tag close) for obvious reasons
  454. .'[^\s\042\047>/=]+' // attribute characters
  455. // optional attribute-value
  456. .'(?:\s*=' // attribute-value separator
  457. .'(?:[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*))' // single, double or non-quoted value
  458. .')?' // end optional attribute-value group
  459. .')*)' // end optional attributes group
  460. .'[^>]*)(?<closeTag>\>)?#isS';
  461. // Note: It would be nice to optimize this for speed, BUT
  462. // only matching the naughty elements here results in
  463. // false positives and in turn - vulnerabilities!
  464. do
  465. {
  466. $old_str = $str;
  467. $str = preg_replace_callback($pattern, array($this, '_sanitize_naughty_html'), $str);
  468. }
  469. while ($old_str !== $str);
  470. unset($old_str);
  471. /*
  472. * Sanitize naughty scripting elements
  473. *
  474. * Similar to above, only instead of looking for
  475. * tags it looks for PHP and JavaScript commands
  476. * that are disallowed. Rather than removing the
  477. * code, it simply converts the parenthesis to entities
  478. * rendering the code un-executable.
  479. *
  480. * For example: eval('some code')
  481. * Becomes: eval&#40;'some code'&#41;
  482. */
  483. $str = preg_replace(
  484. '#(alert|prompt|confirm|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si',
  485. '\\1\\2&#40;\\3&#41;',
  486. $str
  487. );
  488. // Final clean up
  489. // This adds a bit of extra precaution in case
  490. // something got through the above filters
  491. $str = $this->_do_never_allowed($str);
  492. /*
  493. * Images are Handled in a Special Way
  494. * - Essentially, we want to know that after all of the character
  495. * conversion is done whether any unwanted, likely XSS, code was found.
  496. * If not, we return TRUE, as the image is clean.
  497. * However, if the string post-conversion does not matched the
  498. * string post-removal of XSS, then it fails, as there was unwanted XSS
  499. * code found and removed/changed during processing.
  500. */
  501. if ($is_image === TRUE)
  502. {
  503. return ($str === $converted_string);
  504. }
  505. return $str;
  506. }
  507. // --------------------------------------------------------------------
  508. /**
  509. * XSS Hash
  510. *
  511. * Generates the XSS hash if needed and returns it.
  512. *
  513. * @see CI_Security::$_xss_hash
  514. * @return string XSS hash
  515. */
  516. public function xss_hash()
  517. {
  518. if ($this->_xss_hash === NULL)
  519. {
  520. $rand = $this->get_random_bytes(16);
  521. $this->_xss_hash = ($rand === FALSE)
  522. ? md5(uniqid(mt_rand(), TRUE))
  523. : bin2hex($rand);
  524. }
  525. return $this->_xss_hash;
  526. }
  527. // --------------------------------------------------------------------
  528. /**
  529. * Get random bytes
  530. *
  531. * @param int $length Output length
  532. * @return string
  533. */
  534. public function get_random_bytes($length)
  535. {
  536. if (empty($length) OR ! ctype_digit((string) $length))
  537. {
  538. return FALSE;
  539. }
  540. if (function_exists('random_bytes'))
  541. {
  542. try
  543. {
  544. // The cast is required to avoid TypeError
  545. return random_bytes((int) $length);
  546. }
  547. catch (Exception $e)
  548. {
  549. // If random_bytes() can't do the job, we can't either ...
  550. // There's no point in using fallbacks.
  551. log_message('error', $e->getMessage());
  552. return FALSE;
  553. }
  554. }
  555. // Unfortunately, none of the following PRNGs is guaranteed to exist ...
  556. if (defined('MCRYPT_DEV_URANDOM') && ($output = mcrypt_create_iv($length, MCRYPT_DEV_URANDOM)) !== FALSE)
  557. {
  558. return $output;
  559. }
  560. if (is_readable('/dev/urandom') && ($fp = fopen('/dev/urandom', 'rb')) !== FALSE)
  561. {
  562. // Try not to waste entropy ...
  563. is_php('5.4') && stream_set_chunk_size($fp, $length);
  564. $output = fread($fp, $length);
  565. fclose($fp);
  566. if ($output !== FALSE)
  567. {
  568. return $output;
  569. }
  570. }
  571. if (function_exists('openssl_random_pseudo_bytes'))
  572. {
  573. return openssl_random_pseudo_bytes($length);
  574. }
  575. return FALSE;
  576. }
  577. // --------------------------------------------------------------------
  578. /**
  579. * HTML Entities Decode
  580. *
  581. * A replacement for html_entity_decode()
  582. *
  583. * The reason we are not using html_entity_decode() by itself is because
  584. * while it is not technically correct to leave out the semicolon
  585. * at the end of an entity most browsers will still interpret the entity
  586. * correctly. html_entity_decode() does not convert entities without
  587. * semicolons, so we are left with our own little solution here. Bummer.
  588. *
  589. * @link http://php.net/html-entity-decode
  590. *
  591. * @param string $str Input
  592. * @param string $charset Character set
  593. * @return string
  594. */
  595. public function entity_decode($str, $charset = NULL)
  596. {
  597. if (strpos($str, '&') === FALSE)
  598. {
  599. return $str;
  600. }
  601. static $_entities;
  602. isset($charset) OR $charset = $this->charset;
  603. $flag = is_php('5.4')
  604. ? ENT_COMPAT | ENT_HTML5
  605. : ENT_COMPAT;
  606. if ( ! isset($_entities))
  607. {
  608. $_entities = array_map('strtolower', get_html_translation_table(HTML_ENTITIES, $flag, $charset));
  609. // If we're not on PHP 5.4+, add the possibly dangerous HTML 5
  610. // entities to the array manually
  611. if ($flag === ENT_COMPAT)
  612. {
  613. $_entities[':'] = '&colon;';
  614. $_entities['('] = '&lpar;';
  615. $_entities[')'] = '&rpar;';
  616. $_entities["\n"] = '&NewLine;';
  617. $_entities["\t"] = '&Tab;';
  618. }
  619. }
  620. do
  621. {
  622. $str_compare = $str;
  623. // Decode standard entities, avoiding false positives
  624. if (preg_match_all('/&[a-z]{2,}(?![a-z;])/i', $str, $matches))
  625. {
  626. $replace = array();
  627. $matches = array_unique(array_map('strtolower', $matches[0]));
  628. foreach ($matches as &$match)
  629. {
  630. if (($char = array_search($match.';', $_entities, TRUE)) !== FALSE)
  631. {
  632. $replace[$match] = $char;
  633. }
  634. }
  635. $str = str_replace(array_keys($replace), array_values($replace), $str);
  636. }
  637. // Decode numeric & UTF16 two byte entities
  638. $str = html_entity_decode(
  639. preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
  640. $flag,
  641. $charset
  642. );
  643. if ($flag === ENT_COMPAT)
  644. {
  645. $str = str_replace(array_values($_entities), array_keys($_entities), $str);
  646. }
  647. }
  648. while ($str_compare !== $str);
  649. return $str;
  650. }
  651. // --------------------------------------------------------------------
  652. /**
  653. * Sanitize Filename
  654. *
  655. * @param string $str Input file name
  656. * @param bool $relative_path Whether to preserve paths
  657. * @return string
  658. */
  659. public function sanitize_filename($str, $relative_path = FALSE)
  660. {
  661. $bad = $this->filename_bad_chars;
  662. if ( ! $relative_path)
  663. {
  664. $bad[] = './';
  665. $bad[] = '/';
  666. }
  667. $str = remove_invisible_characters($str, FALSE);
  668. do
  669. {
  670. $old = $str;
  671. $str = str_replace($bad, '', $str);
  672. }
  673. while ($old !== $str);
  674. return stripslashes($str);
  675. }
  676. // ----------------------------------------------------------------
  677. /**
  678. * Strip Image Tags
  679. *
  680. * @param string $str
  681. * @return string
  682. */
  683. public function strip_image_tags($str)
  684. {
  685. return preg_replace(
  686. array(
  687. '#<img[\s/]+.*?src\s*=\s*(["\'])([^\\1]+?)\\1.*?\>#i',
  688. '#<img[\s/]+.*?src\s*=\s*?(([^\s"\'=<>`]+)).*?\>#i'
  689. ),
  690. '\\2',
  691. $str
  692. );
  693. }
  694. // ----------------------------------------------------------------
  695. /**
  696. * URL-decode taking spaces into account
  697. *
  698. * @see https://github.com/bcit-ci/CodeIgniter/issues/4877
  699. * @param array $matches
  700. * @return string
  701. */
  702. protected function _urldecodespaces($matches)
  703. {
  704. $input = $matches[0];
  705. $nospaces = preg_replace('#\s+#', '', $input);
  706. return ($nospaces === $input)
  707. ? $input
  708. : rawurldecode($nospaces);
  709. }
  710. // ----------------------------------------------------------------
  711. /**
  712. * Compact Exploded Words
  713. *
  714. * Callback method for xss_clean() to remove whitespace from
  715. * things like 'j a v a s c r i p t'.
  716. *
  717. * @used-by CI_Security::xss_clean()
  718. * @param array $matches
  719. * @return string
  720. */
  721. protected function _compact_exploded_words($matches)
  722. {
  723. return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
  724. }
  725. // --------------------------------------------------------------------
  726. /**
  727. * Sanitize Naughty HTML
  728. *
  729. * Callback method for xss_clean() to remove naughty HTML elements.
  730. *
  731. * @used-by CI_Security::xss_clean()
  732. * @param array $matches
  733. * @return string
  734. */
  735. protected function _sanitize_naughty_html($matches)
  736. {
  737. static $naughty_tags = array(
  738. 'alert', 'area', 'prompt', 'confirm', 'applet', 'audio', 'basefont', 'base', 'behavior', 'bgsound',
  739. 'blink', 'body', 'embed', 'expression', 'form', 'frameset', 'frame', 'head', 'html', 'ilayer',
  740. 'iframe', 'input', 'button', 'select', 'isindex', 'layer', 'link', 'meta', 'keygen', 'object',
  741. 'plaintext', 'style', 'script', 'textarea', 'title', 'math', 'video', 'svg', 'xml', 'xss'
  742. );
  743. static $evil_attributes = array(
  744. 'on\w+', 'style', 'xmlns', 'formaction', 'form', 'xlink:href', 'FSCommand', 'seekSegmentTime'
  745. );
  746. // First, escape unclosed tags
  747. if (empty($matches['closeTag']))
  748. {
  749. return '&lt;'.$matches[1];
  750. }
  751. // Is the element that we caught naughty? If so, escape it
  752. elseif (in_array(strtolower($matches['tagName']), $naughty_tags, TRUE))
  753. {
  754. return '&lt;'.$matches[1].'&gt;';
  755. }
  756. // For other tags, see if their attributes are "evil" and strip those
  757. elseif (isset($matches['attributes']))
  758. {
  759. // We'll store the already fitlered attributes here
  760. $attributes = array();
  761. // Attribute-catching pattern
  762. $attributes_pattern = '#'
  763. .'(?<name>[^\s\042\047>/=]+)' // attribute characters
  764. // optional attribute-value
  765. .'(?:\s*=(?<value>[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*)))' // attribute-value separator
  766. .'#i';
  767. // Blacklist pattern for evil attribute names
  768. $is_evil_pattern = '#^('.implode('|', $evil_attributes).')$#i';
  769. // Each iteration filters a single attribute
  770. do
  771. {
  772. // Strip any non-alpha characters that may precede an attribute.
  773. // Browsers often parse these incorrectly and that has been a
  774. // of numerous XSS issues we've had.
  775. $matches['attributes'] = preg_replace('#^[^a-z]+#i', '', $matches['attributes']);
  776. if ( ! preg_match($attributes_pattern, $matches['attributes'], $attribute, PREG_OFFSET_CAPTURE))
  777. {
  778. // No (valid) attribute found? Discard everything else inside the tag
  779. break;
  780. }
  781. if (
  782. // Is it indeed an "evil" attribute?
  783. preg_match($is_evil_pattern, $attribute['name'][0])
  784. // Or does it have an equals sign, but no value and not quoted? Strip that too!
  785. OR (trim($attribute['value'][0]) === '')
  786. )
  787. {
  788. $attributes[] = 'xss=removed';
  789. }
  790. else
  791. {
  792. $attributes[] = $attribute[0][0];
  793. }
  794. $matches['attributes'] = substr($matches['attributes'], $attribute[0][1] + strlen($attribute[0][0]));
  795. }
  796. while ($matches['attributes'] !== '');
  797. $attributes = empty($attributes)
  798. ? ''
  799. : ' '.implode(' ', $attributes);
  800. return '<'.$matches['slash'].$matches['tagName'].$attributes.'>';
  801. }
  802. return $matches[0];
  803. }
  804. // --------------------------------------------------------------------
  805. /**
  806. * JS Link Removal
  807. *
  808. * Callback method for xss_clean() to sanitize links.
  809. *
  810. * This limits the PCRE backtracks, making it more performance friendly
  811. * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
  812. * PHP 5.2+ on link-heavy strings.
  813. *
  814. * @used-by CI_Security::xss_clean()
  815. * @param array $match
  816. * @return string
  817. */
  818. protected function _js_link_removal($match)
  819. {
  820. return str_replace(
  821. $match[1],
  822. preg_replace(
  823. '#href=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|d\s*a\s*t\s*a\s*:)#si',
  824. '',
  825. $this->_filter_attributes($match[1])
  826. ),
  827. $match[0]
  828. );
  829. }
  830. // --------------------------------------------------------------------
  831. /**
  832. * JS Image Removal
  833. *
  834. * Callback method for xss_clean() to sanitize image tags.
  835. *
  836. * This limits the PCRE backtracks, making it more performance friendly
  837. * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
  838. * PHP 5.2+ on image tag heavy strings.
  839. *
  840. * @used-by CI_Security::xss_clean()
  841. * @param array $match
  842. * @return string
  843. */
  844. protected function _js_img_removal($match)
  845. {
  846. return str_replace(
  847. $match[1],
  848. preg_replace(
  849. '#src=.*?(?:(?:alert|prompt|confirm|eval)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si',
  850. '',
  851. $this->_filter_attributes($match[1])
  852. ),
  853. $match[0]
  854. );
  855. }
  856. // --------------------------------------------------------------------
  857. /**
  858. * Attribute Conversion
  859. *
  860. * @used-by CI_Security::xss_clean()
  861. * @param array $match
  862. * @return string
  863. */
  864. protected function _convert_attribute($match)
  865. {
  866. return str_replace(array('>', '<', '\\'), array('&gt;', '&lt;', '\\\\'), $match[0]);
  867. }
  868. // --------------------------------------------------------------------
  869. /**
  870. * Filter Attributes
  871. *
  872. * Filters tag attributes for consistency and safety.
  873. *
  874. * @used-by CI_Security::_js_img_removal()
  875. * @used-by CI_Security::_js_link_removal()
  876. * @param string $str
  877. * @return string
  878. */
  879. protected function _filter_attributes($str)
  880. {
  881. $out = '';
  882. if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
  883. {
  884. foreach ($matches[0] as $match)
  885. {
  886. $out .= preg_replace('#/\*.*?\*/#s', '', $match);
  887. }
  888. }
  889. return $out;
  890. }
  891. // --------------------------------------------------------------------
  892. /**
  893. * HTML Entity Decode Callback
  894. *
  895. * @used-by CI_Security::xss_clean()
  896. * @param array $match
  897. * @return string
  898. */
  899. protected function _decode_entity($match)
  900. {
  901. // Protect GET variables in URLs
  902. // 901119URL5918AMP18930PROTECT8198
  903. $match = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-/]+)|i', $this->xss_hash().'\\1=\\2', $match[0]);
  904. // Decode, then un-protect URL GET vars
  905. return str_replace(
  906. $this->xss_hash(),
  907. '&',
  908. $this->entity_decode($match, $this->charset)
  909. );
  910. }
  911. // --------------------------------------------------------------------
  912. /**
  913. * Do Never Allowed
  914. *
  915. * @used-by CI_Security::xss_clean()
  916. * @param string
  917. * @return string
  918. */
  919. protected function _do_never_allowed($str)
  920. {
  921. $str = str_replace(array_keys($this->_never_allowed_str), $this->_never_allowed_str, $str);
  922. foreach ($this->_never_allowed_regex as $regex)
  923. {
  924. $str = preg_replace('#'.$regex.'#is', '[removed]', $str);
  925. }
  926. return $str;
  927. }
  928. // --------------------------------------------------------------------
  929. /**
  930. * Set CSRF Hash and Cookie
  931. *
  932. * @return string
  933. */
  934. protected function _csrf_set_hash()
  935. {
  936. if ($this->_csrf_hash === NULL)
  937. {
  938. // If the cookie exists we will use its value.
  939. // We don't necessarily want to regenerate it with
  940. // each page load since a page could contain embedded
  941. // sub-pages causing this feature to fail
  942. if (isset($_COOKIE[$this->_csrf_cookie_name]) && is_string($_COOKIE[$this->_csrf_cookie_name])
  943. && preg_match('#^[0-9a-f]{32}$#iS', $_COOKIE[$this->_csrf_cookie_name]) === 1)
  944. {
  945. return $this->_csrf_hash = $_COOKIE[$this->_csrf_cookie_name];
  946. }
  947. $rand = $this->get_random_bytes(16);
  948. $this->_csrf_hash = ($rand === FALSE)
  949. ? md5(uniqid(mt_rand(), TRUE))
  950. : bin2hex($rand);
  951. }
  952. return $this->_csrf_hash;
  953. }
  954. }