<?php
!function_exists('readover') && exit('Forbidden');
/**
 * LoeuA]tLo@tCʧ@ι{
 * Lhʸ˩MեΡACӤkiHWեΡA
 * ΥH{~ݨDApݭnisXഫA򪽱
 * sXഫYioƾڡALkե
 *
 * @package Filter
 */
class PW_FilterUtil {
	/**
	 * ǦCƫGirsq{|
	 * @var string
	 */
	var $dict_bin_path;
	/**
	 * q{Ҧr媺sm
	 * @var string
	 */
	var $dict_dir;
	/**
	 * q{奻Φrs|
	 * @var string
	 */
	var $dict_source_path;
	/**
	 * ˴ӷPv
	 * @var int
	 */
	var $filter_weight = 0;
	/**
	 * ˴ӷPƾ
	 * @var array
	 */
	var $filter_word;

	var $code;
	var $fbwords = null;
	var $replace = null;
	var $_list = array();

	function PW_FilterUtil($file = array()) {
		if ($file) {
			$this->dict_dir = $file['dir'];
			$this->dict_bin_path = $file['bin'];
			$this->dict_source_path = $file['source'];
		} else {
			$this->dict_dir = D_P.'data/bbscache/';
			$this->dict_path = $this->dict_dir . 'wordsfb.php';
			$this->dict_bin_path = $this->dict_dir . 'dict_all.dat';
			$this->dict_source_path = $this->dict_dir . 'dict_all.txt';
		}
		$this->code = $GLOBALS['db_wordsfb'];
	}

	function setFiles($file) {
		$this->dict_dir = $file['dir'];
		$this->dict_bin_path = $file['bin'];
		$this->dict_source_path = $file['source'];
	}

	/**
	 * ˴rŦO_ݭngLy
	 */
	function ifwordsfb($str) {
		return ($this->comprise($str) === false) ? $this->code : 0;
	}

	function equal($currcode) {
		return ($currcode == $this->code);
	}

	function loadWords() {
		if (!is_array($this->fbwords)) {
			//* include pwCache::getPath(D_P."data/bbscache/wordsfb.php");
			extract(pwCache::getData(D_P."data/bbscache/wordsfb.php", false));
			$this->fbwords	= (array)$wordsfb;
			$this->replace	= (array)$replace;
			$this->alarm	= (array)$alarm;
		}
	}
	/**
	 * ^@ӸgLyrŦ
	 */
	function convert($str, $wdstruct = array()) {
		$this->loadWords();
		$replacedb = $this->fbwords + $this->replace + $this->alarm;
		$msg = $str;
		if ($replacedb) {
			foreach ($replacedb as $key => $value) {
				$msg = preg_replace("/$key/i", $value, $msg);
			}
		}
		if ($wdstruct) {
			if ($msg == $str) {
				$this->addList('yes', $wdstruct['type'], $wdstruct['id']);
			} elseif ($wdstruct['code'] > 0) {
				$this->addList('no', $wdstruct['type'], $wdstruct['id']);
			}
		}
		return $msg;
	}
	function addList($key, $type, $id) {
		if (empty($this->_list)) {
			register_shutdown_function(array($this, 'updateWordsfb'));
		}
		$this->_list[$key][$type][] = $id;
	}
	/**
	 * sҦeyt
	 */
	function updateWordsfb() {
		if ($this->_list['yes']) {
			$this->_update($this->_list['yes'], $this->code);
		}
		if ($this->_list['no']) {
			$this->_update($this->_list['no'], 0);
		}
		$this->_list = array();
	}

	function _update($arr, $val) {//private function
		global $db;
		foreach ($arr as $k => $v) {
			list($table, $field) = $this->tablestruct($k);
			if ($table && $v) {
				/**
				$db->update("UPDATE $table SET ifwordsfb=" . S::sqlEscape($val) . " WHERE $field IN (" . S::sqlImplode($v) . ')');
				**/
				pwQuery::update("{$table}", "$field IN (:$field)", array($v), array('ifwordsfb' => $val));
			}
		}
	}

	function tablestruct($type) {
		$struct = array(
			'topic'		=> array($GLOBALS['pw_tmsgs'], 'tid'),
			'posts'		=> array($GLOBALS['pw_posts'], 'pid'),
			'comments'	=> array('pw_comment', 'id'),
			'oboard'	=> array('pw_oboard','id'),
			'diary'		=> array('pw_diary','did')
		);
		return isset($struct[$type]) ? $struct[$type] : array('','');
	}

	/**
	 * ˴rŦꤤO_]tTεy
	 *
	 * @param $str
	 * @param $replace
	 * @param $alarm
	 * @return bool O_]tTεyAtrue]tAfalseSTεy
	 */
	function comprise($str, $replace = true,$alarm = true) {
		if (empty($str)) {
			return false;
		}
		$this->loadWords();
		foreach ($this->fbwords as $key => $value) {
			if (preg_match("/$key/i", $str)) {
				return $this->getTrueBanword($key);
			}
		}
		if ($replace) {
			foreach ($this->replace as $key => $value) {
				if (preg_match("/$key/i", $str)) {
					return $this->getTrueBanword($key);
				}
			}
		}
		if ($alarm) {
			foreach ($this->alarm as $key => $value) {
				if (preg_match("/$key/i", $str)) {
					return $this->getTrueBanword($key);
				}
			}
		}
		return false;
	}

	function getTrueBanword($word) {
		$word = stripslashes($word);
		//$word = substr($s_word,1,strlen($word)-3);
		$word = preg_replace('/\.\{0\,(\d+)\}/i', '', $word);
		return $word;
	}
	/**
	 * ˴eO_ĵy
	 */
	function alarm($title, $content = '') {
		if ($this->alarm) {
			foreach ($this->alarm as $key => $value) {
				if (preg_match($key,$title) || preg_match($key,$content)) {
					return true;
				}
			}
		}
		return false;
	}

    /**
	* @desc ^ߤ@,Ҭ

	function getInstance() {
		static $instance = null;
		if (!isset($instance)) {
			$instance = new FilterUtil();
		}
		return $instance;
	}*/

    /**
     * cئr
     * @param $path ǦCƫrs|
     * @return $return int cy\^
     */
    function buildDict($path = null) {
        if($path == null) {
           $path = array(
                'bin'    => $this->dict_bin_path,
                'source' => $this->dict_source_path
            );
        }
        $trie = new Trie($path);
        $return = $trie->build();
        return $return;
    }

    /**
     * Loe
     * 1.LoHTMLNX 2. LoP 3.Lo^I
     * 4. cഫ 5. X 6. ǰt 7.pv
     * `N:Lo^ILoŸAi|X{~
     * @param $content string zbݤe
     * @param $skip int  Z
     * @param $convert boll  ²cഫ
     * @param $dic_path string ǦCƦrs|
     * @return $weight int QRɪv
     */
	function paraseContent($content, $skip = 0, $convert = false, $dict_path = null) {

		//LoΤJ奻ҦUBBñ
		//$content = $this->filterWindCode($content);

		//LoΤJ奻ҦHTMLñ
		$content = $this->filterHtml($content);

		//Lo媬AIŸΤP,|LoW
		//$content = $this->filterChineseCode($content);

		//LoLWVXӪUؼIŸA]AMb
		// $content = $this->filterSymbol($content);

		if($convert){
			//isXഫAoDnΤ_c²
			$content = $this->convertCode($content);
		}

		if ($skip >= 1) {
			$skip = intval($skip);
			//Bz
			$content = $this->skipWords($skip,$content);
		}
		$file = array(
			'bin'    => $this->dict_bin_path,
			'source' => $this->dict_source_path
		);
		$trie = new Trie($file);

		//Τ_Ѭdrk
		$result = $trie->search($content, $dict_path);
		if (empty($result)) {
            return 0;
        }

		$bayes = new Bayes();
		//峹v
		$weight = $bayes->getWeight($result);
		return array($weight,$result);
    }

	function getFilterResult($content, $skip = 0, $convert = false, $dict_path = null ) {
		//P_ӷP
		$result = $this->paraseContent($content);
		$array = array();
		//BzP_GG
		if (is_array($result)) {
			foreach ($result[1] as $key=>$value) {
				$array[$key] = $value[0];
			}
			$array = array_unique($array);

			$this->filter_weight = $result[0] >= 1 ? 1 : ($result[0] >= 0.8 ? 2 : 3);
		}
		$this->filter_word = $array;
	}

	/**
	 * @desc JfilterO
	 *
	 * @param int $tid -- DDid
	 * @param int $pid -- ^_id
	 * @param string $filter -- ]tӷP
	 */
	function insert($tid, $pid, $filter, $state=0) {
    	global $db,$timestamp;

    	//P_O__O
    	$sql = "SELECT id,state FROM pw_filter WHERE tid=".S::sqlEscape($tid)." AND pid=".S::sqlEscape($pid);
    	$record = $db->get_one($sql);

	    if (!$record) {
	    	//Bzƾ
	    	$value = array(
	    	    'tid'    => $tid,
	    	    'pid'    => $pid,
	            'filter' => $filter,
	            'state'  => ($state!=3 ? 0 : 3),
				'assessor'=> ($state!=3 ? '' : 'SYSTEM'),
	            'created_at' => $timestamp,
				'updated_at' => $timestamp,
	        );
	        //JsO
	        $db->update("INSERT INTO pw_filter SET " . S::sqlSingle($value));
    	} else {
    		if ($record['state'] == 2 || $record['state'] == 1) {
    			//Bzƾ
				$value = array(
					'state'  => 0,
					'filter' => $filter,
					'created_at' => $timestamp,
				);
				$value = S::sqlSingle($value);

    			//sO
				$sql = "UPDATE pw_filter SET {$value} WHERE tid=".S::sqlEscape($tid)." AND pid=" . S::sqlEscape($pid);
				$db->update($sql);
    		}
    	}
    }

	/**
	 * @desc Rfilter
	 *
	 * @param int $tid DDid
	 * @param int $pid ^_id
	 */
	function delete($tid, $pid) {
		global $db;
		$db->update("DELETE FROM pw_filter WHERE tid=" . S::sqlEscape($tid) . " AND pid=" . S::sqlEscape($pid));
	}

	/**
	 * LoLWVXӪUؼIŸA]AMb
	 * @param $content gLHTMLñLoe
	 * @return $ret string ^Lo᪺G
	 */
	function filterSymbol($content) {
		$length = strlen($content);
		$i = 0;
		$ret = '';
		while ($i < $length) {
			$c = ord($content[$i]);
			if($c<48 || ($c>58 && $c <65) || ($c>90 && $c <97) ||($c>122 && $c<127) ) {
				$i++;
				continue;  //ASCIIXWwDƦrrŸ
			}
			$ret .= chr($c);
			$i++;
		}
		return $ret;
	}

    /**
     * isXഫAoDnΤ_c²
     * @param $fcode Yfrom code AӪsX,p"BIG5"
     * @param $tcode Yto codeAؼнsX,p"GB2312"
     * @param $content wgBzL奻iഫ
     * @param $dict_dir ഫrŹӪsm
     * @return $ret string ^ഫ᪺奻
     */
    function convertCode($content, $fcode = 'CHST', $tcode = 'CHSS', $dict_dir = null) {
        if(is_null($dict_dir)) {
            $dict_dir = $this->dict_dir;
        }
        L::loadClass('Chinese', 'utility/lang', false);
        $ch = new Chinese($fcode, $tcode, true);
        $ret = $ch->Convert($content);
        return $ret;
    }

    /**
     * Bz
     * @param $skip D
     * @param $content BzLJ奻
     * @param $dict_dir r奻m
     * @return $ret Bz᪺奻
     */
    function skipWords($skip, $content, $dict_dir=null) {
        $ret = $content;
        if(is_null($dict_dir)) {
            $dict_dir = $this->dict_source_path;
        }

        $handle = fopen($dict_dir,"r");
        while (!feof($handle)) {
            $lines = fgets($handle);
			//echo $lines;
			//exit;
			//echo $lines;
			//$lines = "jg 1";
            preg_match('/^(.*?)\s+(.*)/i', $lines, $key);
            $len = strlen($key[1]); //p
            for($i=0; $i<$len;$i++) { //}l˥h
                if($i == 0) {
					if(ord($key[1][$i]) > 127){
						$rgx = substr($key[1], $i,2);
						$i++;
					}else{
						$rgx = substr($key[1], $i,1);
					}
                } else  {
					if(ord($key[1][$i]) > 127){
						$rgx .= "(.{0,".$skip."}?)". substr($key[1], $i,2);
						$i++;
					}else{
						$rgx .=  substr(str_replace(array('/','.'),array('\/','\.'),$key[1]), $i,1);
					}
                    if($i == $len-1) {
                        $rgx ="/" . $rgx ."/";
                    }
                }
            }
			//echo "$rgx, $key[1], $ret";

			//echo $rgx;exit;
            $ret = preg_replace($rgx, $key[1], $ret);
        }
        fclose($handle);
        return $ret;
    }

    /**
     * LoΤJ奻ҦUBBñ
     * @param $content string ΤJ奻
     * @return string  ^QLo奻
     */
    function filterWindCode($content) {
    	$pattern = array();
    	if (strpos($content,"[post]")!==false && strpos($content,"[/post]")!==false) {
    		$pattern[] = "/\[post\].+?\[\/post\]/is";
    	}
    	if (strpos($content,"[hide=")!==false && strpos($content,"[/hide]")!==false) {
    		$pattern[] = "/\[hide=.+?\].+?\[\/hide\]/is";
    	}
    	if (strpos($content,"[sell")!==false && strpos($content,"[/sell]")!==false) {
    		$pattern[] = "/\[sell=.+?\].+?\[\/sell\]/is";
    	}
    	$pattern[] = "/\[[a-zA-Z]+[^]]*?\]/is";
    	$pattern[] = "/\[\/[a-zA-Z]*[^]]\]/is";

    	$content = preg_replace($pattern,'',$content);
    	return trim($content);
    }

    /**
     * LoΤJ奻ҦHTMLñ
     * @param $content string ΤJ奻
     * @return $ret string  ^QLo奻
     */
    function filterHtml($content) {
        $ret = strip_tags($content);
        return $ret;
    }

    /**
     * Lo媬AIŸΤP,|LoW
     * @param $content  string  ݭnLorŦ
     * @return $ret string Lo᪺rŦ
     */
    function filterChineseCode($content) {
        $ret = "";
        $chars = array();
        //O_IаO
        $is_code = false;
        $length = iconv_strlen($content,"GBK");
        for ($i=0; $i<$length; $i++) {
            $chars[] = iconv_substr($content, $i, 1, "GBK");
        }

        foreach($chars as $char){

            for($byte = 0xA0; $byte<= 0xA9; $byte++) {
                if(strlen($char) == 2 && ord($char[0]) == $byte) {
                    $is_code = true;
                    continue;
                }
            }
            if(!$is_code) {
                $ret .= $char;
            }
            //^аO
            $is_code = false;
        }
        return $ret;
    }
}

class Trie {
    //q{ǦCƫrs|
    var $default_out_path ;
    //q{lrs|
    var $default_dict_path ;
    //`IƲաCCӸ`IGաA̦O_l`IAl`I.
    var $nodes ;

    function Trie($file) {
        $this->default_out_path  = $file['bin'];
        $this->default_dict_path = $file['source'];
    }

    /**
     * cؾAsxǦCƤ奻ާ@ʸ
     * @param $path  string rsm
     * @param $out_path  string ǦCƫsm
     * @return $ret mixed O_\A\^false
     */
    function build($path = null, $out_path = null) {
        if(empty($path)) {
            $path = $this->default_dict_path;
        }
        if(empty($out_path)) {
            $out_path = $this->default_out_path;
        }

        $words = $this->getDict($path);
        $tree = $this->getTree($words);
        $ret = $this->putBinaryDict($out_path, $tree);
        $a = true;
        return $ret;
    }

    /**
     * Τ_Ѭdrk
     * @param $content string ݭnd䪺奻
     * @param $dict_path ǦCƦr|
     * @return $matchs array d쪺rMv
     */
    function search($content, $dict_path) {
        if(empty($dict_path)) {
            $dict_path = $this->default_out_path;
        }
        $words = $this->getBinaryDict($dict_path);
		if ($words) {
			$this->nodes = $words;
			$matchs = $this->match($content);
			return $matchs;
		} else {
			return false;
		}
    }

    /**
     * N󤤪rvƲդh
     * @param $path string r|
     * @return $words array r
     */
    function getDict($path) {
        $i = 0;
        $words = array();

        $handle = fopen($path, "r");

        if($handle == false) {
            return $words;
        }
        while(!feof($handle)) {
            $words[$i] = trim(fgets($handle));
            $i++;
        }
        fclose($handle);
        return $words;
    }

    /**
     * ǦCƫ᪺räϧǦC
     * @param $path string ǦCƦrs|
     * @return $words array ϧǦCƫ᪺Ʋ
     */
    function getBinaryDict($path = null) {
        if(empty($path)) {
            $path = $this->default_out_path;
        }
		$words = readover($path);
        if(!$words) {
            return array();
        }
        $words = unserialize ($words);
        return $words;
    }

    /**
     * NrǦCƫOs
     * @param $path string Os|
     * @param $words array ƲէΦr
     * @return $ret mixed SOs\^false
     */
    function putBinaryDict($path, $words) {
        if(empty($path)) {
            $path = $this->default_out_path;
        }
        if(!$words) {
            return ;
        }
        $words = serialize($words);
        $handle = fopen($path, 'wb');
        $ret = fwrite($handle, $words);
        if($ret == false) {
            return false;
        }
        fclose($handle);
        return $ret;

    }

    /**
     * cؾ𪺹L{k
     * @param $words array rMvƲ
     */
    function getTree($words) {
        $this->nodes = array( array(false, array()) ); //lơAK[ڸ`I
        $p = 1; //U@ӭnJ`I
        foreach ($words as $word) {
			$cur = 0; //e`I
			//preg_match('/^(.*?)\s+(.*)/i', $word, $weight); //rMv
			//$weight = explode("|", $word);
			//$word = trim($weight[0]);
			list($word, $weight) = $this->split($word);
			for ($len = strlen($word), $i = 0; $i < $len; $i++) {
				$c = ord($word[$i]);
				if (isset($this->nodes[$cur][1][$c])) { //wsbNU
					$cur = $this->nodes[$cur][1][$c];
					continue;
				}
				$this->nodes[$p]= array(false, array()); //Ыطs`I
				$this->nodes[$cur][1][$c] = $p; //b`IOl`I
				$cur = $p; //e`I]sJ
				$p++; //
			}
			$this->nodes[$cur][0] = true; //@ӵAаOl`I
			$this->nodes[$cur][2] = trim($weight); //Nvbl`I
		}
		return $this->nodes;
	}

	function split($str) {
		if (($pos = strrpos($str, '|')) === false) {
			return array($str, 0);
		}
		return array(substr($str, 0, $pos), substr($str, $pos+1));
	}

    /**
     * Τ_jrk
     * @param $s string ݭnd䪺奻
     * @return $ret array d쪺v
     */
    function match($s) {
        $isUTF8 = strtoupper(substr($GLOBALS['db_charset'],0,3)) === 'UTF' ? true : false;
        $ret = array();
        $cur = 0; //e`IAlڸ`I
        $i = 0; //rŦe
        $p = 0; //rŦ^m
        $len = strlen($s);
        while($i < $len) {
            $c = ord($s[$i]);
            if (isset($this->nodes[$cur][1][$c])) { //pGsb
                $cur = $this->nodes[$cur][1][$c]; //Ue`I
                if ($this->nodes[$cur][0]) { //Ol`IAǰtI
                    $ret[$p] = array(substr($s, $p, $i - $p + 1), $this->nodes[$cur][2]); //XǰtmMǰtHεv
                    $p = $i + 1; //]mU@Ӧ^m
                    $cur = 0; //me`Iڸ`I
                }
				$i++; //U@Ӧr
            } else { //ǰt
				$cur = 0; //me`Iڸ`I
                if (!$isUTF8 && ord($s[$p]) > 127 && ord($s[$p+1]) > 127) {
					$p += 2; //]mU@Ӧ^m
				} else {
					$p += 1; //]mU@Ӧ^m
				}
				$i = $p; //e]^m
            }
        }
        return $ret;
    }
}

/**
 * ھڵwyvɶiAثeϥBayeskAҼ{WvT
 * kpUJ
 * ]ɤt1,t2,t3,KKtn,vOw1,w2,w3,KK,wn
 * hھBayeskAvJ
 * ]p1 = w1*w2*w3*KK*wn
 * ]p2 = (1-w1)*(1-w2)*(1-w3)*KK*(1-wn)
 * hv w = p1/(p1+p2)
 * pGp1+p2=0,v1
 * vC_0.5|CvAj_0.5h|v
 * p0.9, 0.8, 0.5, 0.6 gLBayespv0.98A
 * 0.9, 0.8, 0.5, 0.1 gLpvȬ0.8
 */
class Bayes {

    /**
     * 峹v
     * @param $keys ɤǰtƲդvH
     * @return  $weight gLBayeskBzLv
     */
    function getWeight($keys) {
		//print_r($keys);
        $p1 = 1;
        $p2 = 1;
        foreach($keys as $key) {
            if( empty($key[1]) ) {
                continue;
            }
            $weight = floatval($key[1]);
            $p1 *= $weight;
            $p2 *= (1- $weight);
        }
        if( ($p1 + $p2) == 0 ) {
            $weight = 1;
            return $weight;
        }

        $weight = $p1 / ($p1 + $p2);
        return $weight;
    }
}
?>