set_options($options); //Define the gzip headers $this->set_gzip_headers(); //Start things off $this->start(); } /** * Options can be set with a simple comma separated string or a full array * String e.g : ('css,javascript,page') * Array e.g: array("javascript"=>array("cachedir"=>"/minify/", "gzip"=>true, "minify"=>true, ), "css"=>array("cachedir"=>"/minify/", "gzip"=>true, "minify"=>true, ), "page"=>array("gzip"=>true, "minify"=>true ) ); **/ function set_options($options) { if(is_array($options)) { $this->options = $options; } else { //Get current directory $current_dir = $this->get_current_path(true); $full_options = array("javascript"=>array("cachedir"=>$current_dir, "gzip"=>true, "minify"=>true, ), "css"=>array("cachedir"=>$current_dir, "gzip"=>true, "minify"=>true, ), "page"=>array("gzip"=>true, "minify"=>true ) ); $options_array = explode(",",$options); asort($options_array); //Make sure page last foreach($options_array AS $key=>$value) { @$this->options[$value] = $full_options[$value]; } } //Make sure cachedir does not have trailing slash foreach($this->options AS $key=>$option) { if(substr($option['cachedir'],-1,1) == "/") { $cachedir = substr($option['cachedir'],0,-1); $option['cachedir'] = $cachedir; } $this->options[$key] = $option; } $this->options['show_timer'] = false; //time the javascript and css compression? } /** * Start saving the output buffer * **/ function start() { ob_start(); } /** * Do work and return output buffer * **/ function finish() { $this->runtime = $this->startTimer(); $this->times['start_compress'] = $this->returnTime($this->runtime); $this->content = ob_get_clean(); //Run the functions specified in options foreach($this->options AS $func=>$option) { if(method_exists($this,$func)) { $this->$func($option,$func); } } //Delete old cache files if(@is_array($this->compressed_files)) { $this->compressed_files_string = implode("",$this->compressed_files); //Make a string with the names of the compressed files } $this->do_cleanup(); //Delete any files that don't match the string $this->times['end'] = $this->returnTime($this->runtime); //Echo content to the browser echo $this->content; //Show compress time if($this->options['show_timer'] && !$this->options['page']['gzip']) { echo "Compress took " . number_format($this->times['end'],2) . " seconds"; } } /** * GZIP and minify the javascript as required * **/ function javascript($options,$type) { $this->content = $this->do_compress(array('cachedir'=>$options['cachedir'], 'tag'=>'script', 'type'=>'text/javascript', 'ext'=>'js', 'src'=>'src', 'self_close'=>false, 'gzip'=>$options['gzip'], 'minify'=>$options['minify'], 'header'=>$type, 'save_name'=>$type),$this->content); } /** * GZIP and minify the CSS as required * **/ function css($options,$type) { //Add default media type $media_types[] = array("name"=>"", "type"=>"text/css" ); //Get any media types in the document $head = $this->get_head($this->content); if($head) { preg_match_all("!]+media=[\"'\s](.*?)[\"'\s][^>]+>!is", $head, $matches); } //Run through the media types and sub in markers if(is_array($matches)) { foreach($matches[0] AS $key=>$value) { if(strstr($value,"stylesheet")) { //make sure it's a style sheet $thevalues = array("code"=>$matches[0][$key], "marker"=>str_replace("text/css","marker%%%".$matches[1][$key],$matches[0][$key]), "name"=>$matches[1][$key], "type"=>"marker%%%".$matches[1][$key] ); $media_types[$matches[1][$key]] = $thevalues; //Add in marker $this->content = str_replace($thevalues['code'],$thevalues['marker'],$this->content); } } } //Compress separately for each media type foreach($media_types AS $key=>$value) { $this->content = $this->do_compress(array('cachedir'=>$options['cachedir'], 'tag'=>'link', 'type'=>$value['type'], 'ext'=>'css', 'src'=>'href', 'rel'=>'stylesheet', 'media'=>$value['name'], 'self_close'=>true, 'gzip'=>$options['gzip'], 'minify'=>$options['minify'], 'header'=>$type, 'save_name'=>$type.$value['name']),$this->content); //Replace out the markers $this->content = str_replace($value['type'],'text/css',$this->content); } } /** * GZIP and minify the page itself as required * **/ function page($options,$type) { //Minify page itself if($options['minify']) { $this->content = $this->trimwhitespace($this->content); } //Gzip page itself if($options['gzip'] && strstr($_SERVER['HTTP_ACCEPT_ENCODING'], 'gzip')) { $Size = strlen( $this->content ); $Crc = crc32( $this->content ); header('Content-Encoding: gzip'); $content = "\x1f\x8b\x08\x00\x00\x00\x00\x00"; $this->content = gzcompress( $this->content,3); $this->content = substr( $this->content, 0, strlen( $this->content) - 4 ); $content .= ( $this->content ); $content .= ( pack( 'V', $Crc ) ); $content .= ( pack( 'V', $Size ) ); $this->content = $content; } } /** * Compress JS or CSS and return source * **/ function do_compress($options,$source) { //Change the extension if($options['gzip']) { $options['ext'] = "php"; } $cachedir = $_SERVER['DOCUMENT_ROOT'] . $options['cachedir']; $head = $this->get_head($source); if($head) { $regex = "!<" . $options['tag'] . "[^>]+" . $options['type'] . "[^>]+>()?!is"; preg_match_all($regex, $head, $matches); } $script_array = $matches[0]; if(!is_array($script_array)) { //Noting to do return $source; } //Make sure src element present foreach($script_array AS $key=>$value) { if(!strstr($value,$options['src'])) { unset($script_array[$key]); } } //Remove empty sources and any externally linked files foreach($script_array AS $key=>$value) { preg_match("!" . $options['src'] . "=\"(.*?)\"!is", $value, $src); if(!$src[1]){ unset($script_array[$key]); } if(strlen($src[1])> 7 && strcasecmp(substr($src[1],0,7),'http://')==0) { if(!strstr($src[1],$_SERVER['HTTP_HOST'])) { unset($script_array[$key]); } } } //Get date string for making hash $datestring = $this->get_file_dates($script_array,$options); //Get the cache hash $cache_file = '_cmp_' . $options['save_name'] . '_' . md5(implode("_",$script_array).$datestring); //echo $cache_file . "\n"; //Check if the cache file exists if (file_exists($cachedir . '/' . $cache_file . ".$options[ext]")) { $source = $this->_remove_scripts($script_array,$source); $source = str_replace("@@marker@@",$this->get_new_file($options,$cache_file),$source); return $source; } $script_array = $this->get_file_locations($script_array,$options); //Create file if(is_array($script_array)) { foreach($script_array AS $key=>$info) { //Get the code if (file_exists($info['src'])) { $contents .= file_get_contents($info['src']) . "\n"; if($key == count($script_array)-1) { //Remove script $source = str_replace($info['location'],"@@marker@@",$source); } else { $source = str_replace($info['location'],"",$source); } } } } //Allow for minification of javascript if($options['header'] == "javascript" && $options['minify'] && substr(phpversion(),0,1) == 5) { //Only minify on php5+ require_once('jsmin.php'); $jsmin = new JSMin($contents); $contents = $jsmin->minify($contents); } //Allow for minification of CSS if($options['header'] == "css" && $options['minify']) { //Minify CSS $contents = $this->minify_text($contents); } //Allow for gzipping if($options['gzip']) { $contents = $this->gzip_header[$options['header']] . $contents; } //Write to cache and display if($contents) { if ($fp = fopen($cachedir . '/' . $cache_file . '.' . $options[ext], 'wb')) { fwrite($fp, $contents); fclose($fp); //Create the link to the new file $newfile = $this->get_new_file($options,$cache_file); $source = str_replace("@@marker@@",$newfile,$source); } } return $source; } /** * Replaces the script or css links in the source with a marker * */ function _remove_scripts($script_array,$source) { foreach($script_array AS $key=>$value) { if($key == count($script_array)-1) { //Remove script $source = str_replace($value,"@@marker@@",$source); } else { $source = str_replace($value,"",$source); } } return $source; } /** * Returns the filename for our new compressed file * **/ function get_new_file($options,$cache_file) { $newfile = "<" . $options['tag'] . " type=\"" . $options['type'] . "\" $options[src]=\"http://" . $_SERVER['HTTP_HOST'] . $options['cachedir'] ."/$cache_file." . $options[ext] . "\""; if($options['rel']) { $newfile .= " rel=\"" . $options['rel'] . "\""; } if($options['media']) { $newfile .= " media=\"" . $options['media'] . "\""; } if($options['self_close']) { $newfile .= " />"; } else { $newfile .= ">"; } $this->compressed_files[] = $newfile; return $newfile; } /** * Returns the last modified dates of the files being compressed * In this way we can see if any changes have been made **/ function get_file_dates($files,$options) { $files = $this->get_file_locations($files,$options); if(!is_array($files)) { return; } foreach($files AS $key=>$value) { if(file_exists($value['src'])) { $thedate = filemtime($value['src']); $dates[] = $thedate; } } if(is_array($dates)) { return implode(".",$dates); } } /** * Gets the path locations of the scripts being compressed * **/ function get_file_locations($script_array,$options) { //Remove empty sources foreach($script_array AS $key=>$value) { preg_match("!" . $options['src'] . "=\"(.*?)\"!is", $value, $src); if(!$src[1]) { unset($script_array[$key]); } } //Create file foreach($script_array AS $key=>$value) { //Get the src preg_match("!" . $options['src'] . "=\"(.*?)\"!is", $value, $src); $src[1] = str_replace("http://".$_SERVER['HTTP_HOST'],"",$src[1]); if(strstr($src[1],"/")) { $current_src = $_SERVER['DOCUMENT_ROOT'] . $src[1]; } else { $current_src = $_SERVER['DOCUMENT_ROOT'] . $this->get_current_path() . $src[1]; } $return_array[] = array('src'=>$current_src, 'location'=>$value); } return $return_array; } /** * Sets the headers to be sent in the javascript and css files * **/ function set_gzip_headers() { $this->gzip_header['javascript'] = ''; $this->gzip_header['css'] = ''; } /** * Strips whitespace and comments from a text string * **/ function minify_text($txt) { // Compress whitespace. $txt = preg_replace('/\s+/', ' ', $txt); // Remove comments. $txt = preg_replace('/\/\*.*?\*\//', '', $txt); return $txt; } /** * Safely trim whitespace from an HTML page * Adapted from smarty code http://www.smarty.net/ **/ function trimwhitespace($source) { // Pull out the script blocks preg_match_all("!]+>.*?!is", $source, $match); $_script_blocks = $match[0]; $source = preg_replace("!]+>.*?!is", '@@@COMPRESSOR:TRIM:SCRIPT@@@', $source); // Pull out the pre blocks preg_match_all("!
.*?
!is", $source, $match); $_pre_blocks = $match[0]; $source = preg_replace("!
.*?
!is", '@@@COMPRESSOR:TRIM:PRE@@@', $source); // Pull out the textarea blocks preg_match_all("!]+>.*?!is", $source, $match); $_textarea_blocks = $match[0]; $source = preg_replace("!]+>.*?!is", '@@@COMPRESSOR:TRIM:TEXTAREA@@@', $source); // remove all leading spaces, tabs and carriage returns NOT // preceeded by a php close tag. $source = trim(preg_replace('/((?)\n)[\s]+/m', '\1', $source)); //Remove comments $source = preg_replace("//U","",$source); // replace textarea blocks $this->trimwhitespace_replace("@@@COMPRESSOR:TRIM:TEXTAREA@@@",$_textarea_blocks, $source); // replace pre blocks $this->trimwhitespace_replace("@@@COMPRESSOR:TRIM:PRE@@@",$_pre_blocks, $source); // replace script blocks $this->trimwhitespace_replace("@@@COMPRESSOR:TRIM:SCRIPT@@@",$_script_blocks, $source); return $source; } /** * Helper function for trimwhitespace * **/ function trimwhitespace_replace($search_str, $replace, &$subject) { $_len = strlen($search_str); $_pos = 0; for ($_i=0, $_count=count($replace); $_i<$_count; $_i++) if (($_pos=strpos($subject, $search_str, $_pos))!==false) $subject = substr_replace($subject, $replace[$_i], $_pos, $_len); else break; } /** * Gets the directory we are in * **/ function get_current_path($trailing=false) { preg_match("@.*\/@",$_SERVER['REQUEST_URI'],$matches); $current_dir = $matches[0]; //Remove trailing slash if($trailing) { if(substr($current_dir,-1,1) == "/") { $current_dir = substr($current_dir,0,-1); } } return $current_dir; } /** * Gets the head part of the $source * **/ function get_head($source) { preg_match("!]+)?>.*?!is", $source, $matches); if(is_array($matches)) { return $matches[0]; } } /** * Removes old cache files * **/ function do_cleanup() { //Get all directories foreach($this->options AS $key=>$value) { $active_dirs[] = $_SERVER['DOCUMENT_ROOT'] . $value['cachedir']; } foreach($active_dirs AS $path) { $files = $this->get_files_in_dir($path); foreach($files AS $file) { if (strstr($file,"_cmp_") && !strstr($this->compressed_files_string,$file)) { unlink($path . "/" . $file); } // end if } } } /** * Returns list of files in a directory * **/ function get_files_in_dir($path) { // open this directory $myDirectory = opendir($path); // get each entry while($entryName = readdir($myDirectory)) { $dirArray[] = $entryName; } // close directory closedir($myDirectory); return $dirArray; } //Start script timing function startTimer() { $mtime = microtime(); $mtime = explode(" ",$mtime); $mtime = $mtime[1] + $mtime[0]; $starttime = $mtime; return $starttime; } //Return current time function returnTime($starttime) { $mtime = microtime(); $mtime = explode(" ",$mtime); $mtime = $mtime[1] + $mtime[0]; $endtime = $mtime; $totaltime = ($endtime - $starttime); return $totaltime; } } // end class ?> getKeyword()}&yearFrom={$searchOption->getYearFrom()}&yearTo={$searchOption->getYearTo()}"; ?> "; /*************************** * 현재 검색된 문서들 각각에서 term frequency vector를 가져오고 이를 이용해서 * 각 term들의 문서 내에서의 빈도수, 검색 결과내에서 텀이 등장하는 문서 개수를 계산 * output: docFreq[term_id]: 검색 결과에서 해당 텀을 가지고 있는 문서 개수 ****************************/ $f1 = floatMicrotime(); // $pubmedID = '19890743'; // 19627182 $query = "SELECT r.docid, r.fvector FROM `cache_data` c,`raw_data` r WHERE c.docid=r.docid and c.cacheid=" . $cacheId; //. " and c.docid <= $pubmedID"; if($mysql_result = mysqli_query($mysqlLink, $query)) { $dCummTime = 0; while($row = mysqli_fetch_assoc($mysql_result)) { $docId = $row["docid"]; $numOfDocuments++; $curTerm = strtok($row["fvector"], ' :'); $curFreq = strtok(' :'); // term occurrence, document frequency 계산 // - 존재하지 않는 term이면 추가하고 있으면 curFreq를 더한다. if(array_key_exists($curTerm, $termOccur)) { $termOccur[$curTerm] += $curFreq; $docFreq[$curTerm] += 1; } else { $termOccur[$curTerm] = $curFreq; $docFreq[$curTerm] = 1; } $dTime = floatMicrotime(); while($curFreq != '') { $curTerm = strtok(' :'); $curFreq = strtok(' :'); if(strlen($curTerm) != 0) { // term occurrence, document frequency 계산 // - 존재하지 않는 term이면 추가하고 있으면 curFreq를 더한다. if(array_key_exists($curTerm, $termOccur)) { $termOccur[$curTerm] += $curFreq; $docFreq[$curTerm] += 1; } else { $termOccur[$curTerm] = $curFreq; $docFreq[$curTerm] = 1; } } } $dCummTime += (floatMicrotime() - $dTime); } // end while(mysql) //echo "dCummTime: $dCummTime
"; $tempDocFreq = $docFreq; $docFreq = array(); foreach($tempDocFreq as $key => $val) { if($key != "null") { $docFreq[$key] = $val; } } // 각 텀을 빈도수 순으로 정렬합니다. 오름차순 정렬입니다. asort($docFreq); // print_r($docFreq); $globalDocFreq = array(); mysqli_free_result($mysql_result); unset($mysql_result); if(@$_REQUEST["debug"] == 1) echo 'f1_검색결과에서 문서별 단어개수, 단어등장 문서 개수 카운트:'. (floatMicrotime()-$f1).'
'; /*************************** * global idf 값 가져오는 부분 * output: $globalDocFreq[term_id]: 전체 문서 셋에서 해당 텀을 가지고 있는 문서 개수 ****************************/ $f2 = floatMicrotime(); ///////////////////////////// $query = "SELECT * FROM idf WHERE fnum in ("; $termIdList = array_keys($docFreq); for($i=0; $i"; if(isset($_REQUEST["globalIdfRatio"])) $threshold1_globalIdf_ratio = $_REQUEST["globalIdfRatio"]; //echo "GlobalIdfRatio: ". $threshold1_globalIdf_ratio . "
"; if(isset($_REQUEST["localGlobal"])) $local_global_threshold = $_REQUEST["localGlobal"]; //echo "localGlobal: ". $local_global_threshold . "
"; if(isset($_REQUEST["siblingThreshold"])) $siblingThreshold = $_REQUEST["siblingThreshold"]; //echo "Sibling Thresdhold: ". $siblingThreshold . "
"; if(isset($_REQUEST["parentThreshold"])) $parentThreshold = $_REQUEST["parentThreshold"]; //echo "Parent-Child Thredhold: ". $parentThreshold . "
"; // $threshold1_globalIdf = 18000000 * $threshold1_globalIdf_ratio; // ---끝--- 외부에서 Threadhold를 수정할 수 있게 합니다. 논문 실험용 - 김태훈 2010/07/10 ///////// // echo "ternId\tlocal_idf\tglobal_idf\n"; ////////////////////////////////////////////////////////// // 2010 DTMBIO 논문 때문에 특정 키워드일때 Threshold 값을 다르게 지정하도록 함 ---시작 --- // 2010/7/14 김태훈 if(@$_REQUEST["debug"] == 1) echo 'f2 전체 데이터베이스에서 해당 텀을 가지고있는 문서 개수 카운트:'. (floatMicrotime()-$f2).'
'; $f3 = floatMicrotime(); if($searchOption->getKeyword() === "mrsazzz" || $searchOption->getKeyword() === "mrsa strainzzz" || $searchOption->getKeyword() === "mrsa resistancezzz") { // 논문 구현때문에 하드 코딩한 부분 foreach($tempDocFreq as $key => $val) { $global = $globalDocFreq[$key]; $p_x = $val/$numOfDocuments; if($global < 18000000 * 0.05) // global idf < # of all documents * 0.05 만 써요 { if($p_x > 0.05) //sebset idf > # of result documents * 0.05 { $dfRatio = $val / $global; if($dfRatio < 0.05) { $docFreq[$key] = $val; $count++; } } } $termEntropy[$key] = number_format((-1) * $p_x * log($p_x, 10), 4); } } else // 아래가 정상적인 코드 { foreach($tempDocFreq as $key => $val) { if($termTypeMapping[$key] == "abstract") { $global = $globalDocFreq[$key] / $numOfGlobalDocuments; $p_x = $val/$numOfDocuments; if(@$_REQUEST["debug"] == 1) { if(true /*$global < $threshold1_globalIdf_ratio && $globalDocFreq[$key] > 100*/) { $dfRatio = $p_x / $global; if($p_x > $threshold1_localIdf_ratio && $dfRatio > $local_global_threshold) //subset idf > # of result documents * 0.05 { $docFreq[$key] = $val; $count++; } } } else { if($global < $threshold1_globalIdf_ratio && $globalDocFreq[$key] > 100) { $dfRatio = $p_x / $global; if($p_x > $threshold1_localIdf_ratio) //subset idf > # of result documents * 0.05 { $docFreq[$key] = $val; $count++; } else if($dfRatio > $local_global_threshold) // (local idf/# of local documents) / (global idf/# of global documents) < 9.2 { $docFreq[$key] = $val; $count++; } } } /* 플래시 안에서의 똥그라미 싸이즈 변경을 위한 새로운 공식 */ // $termEntropy[$key] = $p_x/$global; $termEntropy[$key] = number_format((-1) * $p_x * log($p_x,10), 4); } } /* 똥그라미 사이즈 변경을 위한 parameter */ $addPara = 0.1; $maxEntropy = max($termEntropy); $entropyList = $termEntropy; $termEntropy = array(); foreach($entropyList as $key => $val) { $termEntropy[$key] = ($entropyList[$key]/$maxEntropy) * (1 - $addPara) + $addPara; } } unset($tempDocFreq); unset($globalDocFreq); // -----끝 ----- 2010 DTMBIO 논문 때문에 특정 키워드일때 Threshold 값을 다르게 지정하도록 함 ------ if($count<=0) { $endFlag = 1; } //echo "# of result documents : $numOfDocuments\n"; //echo "# of useful terms: $count\n"; //echo "# of terms: ".sizeof($termOccur)."\n"; unset($docId); unset($curTerm); unset($curFreq); } // end if // echo ""; unset($termOccur); if(@$_REQUEST["debug"] == 1) echo 'f3 concept 찾기:'. (floatMicrotime()-$f3).'
'; $f4 = floatMicrotime(); if($endFlag != 1) { $i = 0; $j = 0; $termIdx2Id = array(); //useful termTerm matrix 생성 init = 0 foreach($docFreq as $key1 => $val1) { $termIdx2Id[$i++] = $key1; foreach($docFreq as $key2 => $val2) { if(!isset($termTerm[$key1][$key2])) { $termTerm[$key1][$key2] = 0; } } } // 두 번째 db scan. 첫 번째 step에서 추출 된 term list에 속한 term들만 term-term table을 만든다(co-occurrence) // termterm 행렬도 DOCUMENT frequency 임 (텀프리퀀시 아님) $query = "SELECT r.docid, r.fvector FROM `cache_data` c,`raw_data` r WHERE c.docid=r.docid and c.cacheid=" . $cacheId ;//. " and r.docid <= $pubmedID"; if($mysql_result = mysqli_query($mysqlLink, $query)) { while($row = mysqli_fetch_assoc($mysql_result)) { $docId = $row["docid"]; $numTermsInThisDoc = 0; // 현재 row(document)의 term list 추출 $curDocTerm = array(); $curTerm = strtok($row["fvector"], ' :'); $curFreq = strtok(' :'); //useful termList에 속한 term만 curDocTerm에 저장 if(array_key_exists($curTerm, $docFreq)) { $curDocTerm[$numTermsInThisDoc++] = $curTerm; } while($curFreq != '') { $curTerm = strtok(' :'); $curFreq = strtok(' :'); if(strlen($curTerm) != 0) { if(array_key_exists($curTerm, $docFreq)) { $curDocTerm[$numTermsInThisDoc++] = $curTerm; } } }//end while````` // useful한 term list에 존재하는 term들간의 co-occurrence 계산 for($i=0; $i<$numTermsInThisDoc-1; $i++) { for($j=$i+1; $j<$numTermsInThisDoc; $j++) { $termTerm[$curDocTerm[$i]][$curDocTerm[$j]] += 1; } } }//end while unset($curDocTerm); }//end if if(@$_REQUEST["debug"] == 1) echo 'f4 현재 검색결과에서 텀과 텀간의 빈도수 행렬 생성:'. (floatMicrotime()-$f4).'
'; // term간의 relation 찾기 $numTerm = sizeof($docFreq); $RELATION_SIBLING = "Sibling"; $parentChildRelation = "Parent-Child"; $overTreeRelation = "Over Tree Parent-Child"; /***************************************** * term-term matrix와 df matrix를 보면서 확률 계산 - sibling/parent relation detect ****************************************/ // 비교를 위한 나이브 메소드 - POST // ------------------ if(@$_REQUEST["debug"] == 1) { $f5_naive = floatMicrotime(); $termRelation = array(); $termRalationSibling = array(); $whoIsMyRoot = array(); $termHavingRelation = array(); $myGraph = array(); for($i=0; $i<$numTerm; $i++) { $whoIsMyRoot[$termIdx2Id[$i]] = $termIdx2Id[$i]; // 이거 두개는 $edgeFromRoot[$termIdx2Id[$i]] = 0; // 나중에 저장된 relation으로 tree 구조 visualize 해줄 때 필요할거 같아서 $myGraph[$termIdx2Id[$i]] = $i; }// 각 term의 graph상 root값을 자기 자신으로 초기화, root로부터의 거리는 0 for($i=0; $i<$numTerm; $i++) { for($j=0; $j<$numTerm; $j++) { $proiGivenj = $projGiveni = 0; if($termTerm[$termIdx2Id[$i]][$termIdx2Id[$j]]!=0) // term_i와 term_j가 같이 나오는 document가 있어요 { $proiGivenj = $termTerm[$termIdx2Id[$i]][$termIdx2Id[$j]] / $docFreq[$termIdx2Id[$j]]; // P(Term_i | Term_j) $projGiveni = $termTerm[$termIdx2Id[$i]][$termIdx2Id[$j]] / $docFreq[$termIdx2Id[$i]]; // P(Term_j | Term_i) if(($proiGivenj > $siblingThreshold) && ($projGiveni > $siblingThreshold)) // Term_i와 Term_j 는 sibling { $termRalationSibling[$termIdx2Id[$j]][$termIdx2Id[$i]] = $RELATION_SIBLING; $tempGraph = $myGraph[$termIdx2Id[$i]]; for($r=0; $r<$numTerm; $r++) { if($myGraph[$termIdx2Id[$r]]==$tempGraph) { $myGraph[$termIdx2Id[$r]]=$myGraph[$termIdx2Id[$j]]; } } $termHavingRelation[$termIdx2Id[$i]] = $whoIsMyRoot[$termIdx2Id[$i]]; $termHavingRelation[$termIdx2Id[$j]] = $whoIsMyRoot[$termIdx2Id[$j]]; } if($projGiveni > $parentThreshold && $proiGivenj < $parentThreshold) // Term_j는 Term_i의 parent { $termRelation[$termIdx2Id[$j]][$termIdx2Id[$i]] = $parentChildRelation; $tempRoot = $whoIsMyRoot[$termIdx2Id[$i]]; $tempEdge = $edgeFromRoot[$termIdx2Id[$i]]; $tempGraph = $myGraph[$termIdx2Id[$i]]; // 기존 term_i를 root로 저장했던 term들에 대해서 term_j의 root로 지정 for($r=0; $r<$numTerm; $r++) { if($whoIsMyRoot[$termIdx2Id[$r]] == $tempRoot) { $whoIsMyRoot[$termIdx2Id[$r]] = $whoIsMyRoot[$termIdx2Id[$j]]; $edgeFromRoot[$termIdx2Id[$r]] += $edgeFromRoot[$termIdx2Id[$j]] + 1; // root로 부터의 거리 재계산 } if($myGraph[$termIdx2Id[$r]]==$tempGraph) { $myGraph[$termIdx2Id[$r]]=$myGraph[$termIdx2Id[$j]]; } } $termHavingRelation[$termIdx2Id[$i]] = $whoIsMyRoot[$termIdx2Id[$i]]; $termHavingRelation[$termIdx2Id[$j]] = $whoIsMyRoot[$termIdx2Id[$j]]; // break; } } else // 같이 나오는 document가 하나도 없어요. relation 계산도 안함. { $proiGivenj = $projGiveni = 0; } } } // 중복된 관계 제거 foreach ($termRelation as $key1 => $subTermRelation1) { foreach ($subTermRelation1 as $key2 => $value1) { if($value1 == $parentChildRelation) { foreach ($termRelation as $key3 => $subTermRelation2) { foreach ($subTermRelation2 as $key4 => $value2) { if($value2 == $parentChildRelation && ($key2 == $key4 && $key1 != $key3)) { $termRelation[$key3][$key4] = null; } } } } } } foreach ($termRelation as $key1 => $subTermRelation1) { foreach ($subTermRelation1 as $key2 => $value1) { if(is_null($termRelation[$key1][$key2])) { unset($termRelation[$key1][$key2]); } } } if(@$_REQUEST["debug"] == 1) echo 'f5_POST:'. (floatMicrotime()-$f5_naive).'
'; } // KBS 저널 논문에 제안된 COMPACT 메소드 // ----------------------------- $f5_compact = floatMicrotime(); $termRelation = array(); $termRalationSibling = array(); $whoIsMyRoot = array(); $termHavingRelation = array(); $myGraph = array(); for($i=0; $i<$numTerm; $i++) { $whoIsMyRoot[$termIdx2Id[$i]] = $termIdx2Id[$i]; // 이거 두개는 $edgeFromRoot[$termIdx2Id[$i]] = 0; // 나중에 저장된 relation으로 tree 구조 visualize 해줄 때 필요할거 같아서 $myGraph[$termIdx2Id[$i]] = $i; }// 각 term의 graph상 root값을 자기 자신으로 초기화, root로부터의 거리는 0 for($i=0; $i<$numTerm-1; $i++) { for($j=$i+1; $j<$numTerm; $j++) { $proiGivenj = $projGiveni = 0; if($termTerm[$termIdx2Id[$i]][$termIdx2Id[$j]]!=0) // term_i와 term_j가 같이 나오는 document가 있어요 { $proiGivenj = $termTerm[$termIdx2Id[$i]][$termIdx2Id[$j]] / $docFreq[$termIdx2Id[$j]]; // P(Term_i | Term_j) $projGiveni = $termTerm[$termIdx2Id[$i]][$termIdx2Id[$j]] / $docFreq[$termIdx2Id[$i]]; // P(Term_j | Term_i) if($projGiveni > $parentThreshold && $proiGivenj < $parentThreshold) // Term_j는 Term_i의 parent { // echo "** $proiGivenj({$termTerm[$termIdx2Id[$i]][$termIdx2Id[$j]]}/{$docFreq[$termIdx2Id[$j]]}), ". // "$projGiveni({$termTerm[$termIdx2Id[$i]][$termIdx2Id[$j]]}/{$docFreq[$termIdx2Id[$i]]})
"; $termRelation[$termIdx2Id[$j]][$termIdx2Id[$i]] = $parentChildRelation; $tempRoot = $whoIsMyRoot[$termIdx2Id[$i]]; $tempEdge = $edgeFromRoot[$termIdx2Id[$i]]; $tempGraph = $myGraph[$termIdx2Id[$i]]; for($r=0; $r<$numTerm; $r++) { if($whoIsMyRoot[$termIdx2Id[$r]] == $tempRoot) { $whoIsMyRoot[$termIdx2Id[$r]] = $whoIsMyRoot[$termIdx2Id[$j]]; $edgeFromRoot[$termIdx2Id[$r]] += $edgeFromRoot[$termIdx2Id[$j]] + 1; // root로 부터의 거리 재계산 } if($myGraph[$termIdx2Id[$r]]==$tempGraph) { $myGraph[$termIdx2Id[$r]]=$myGraph[$termIdx2Id[$j]]; } } $termHavingRelation[$termIdx2Id[$i]] = $whoIsMyRoot[$termIdx2Id[$i]]; $termHavingRelation[$termIdx2Id[$j]] = $whoIsMyRoot[$termIdx2Id[$j]]; break; } // else { // echo "$proiGivenj, $projGiveni
"; // } } else // 같이 나오는 document가 하나도 없어요. relation 계산도 안함. { $proiGivenj = $projGiveni = 0; } } } if(@$_REQUEST["debug"] == 1) echo 'f5_COMPACT_minimal:'. (floatMicrotime()-$f5_compact).'
'; $f6 = floatMicrotime(); $numTerm = sizeof($termIdx2Id); $candidateList = array(); // additional relation detect for($i=0; $i<$numTerm-1; $i++) { foreach($termIdx2Id as $key => $val) { $candidateList[$val] = true; } $candidateList[$termIdx2Id[$i]] = false; // find ancestors of $termIdx2Id[$i] $ancestorList = array(); $ancestorList = findAncestors($i, $ancestorList, $termIdx2Id, $termRelation, $parentChildRelation, $overTreeRelation, $numTerm); foreach($ancestorList as $key => $val) { $candidateList[$key] = false; } for($j=$i+1; $j<$numTerm; $j++) { $proiGivenj = $termTerm[$termIdx2Id[$i]][$termIdx2Id[$j]] / $docFreq[$termIdx2Id[$j]]; // P(Term_i | Term_j) $projGiveni = $termTerm[$termIdx2Id[$i]][$termIdx2Id[$j]] / $docFreq[$termIdx2Id[$i]]; // P(Term_j | Term_i) // $projGiveni = $termTerm[$termIdx2Id[$i]][$termIdx2Id[$j]] / $docFreq[$termIdx2Id[$i]]; // $proiGivenj = $termTerm[$termIdx2Id[$i]][$termIdx2Id[$j]] / $docFreq[$termIdx2Id[$j]]; if(($candidateList[$termIdx2Id[$j]] == true) && ($projGiveni > $parentThreshold) && ($proiGivenj < $parentThreshold)) // term_j가 속하는 tree와 relation이 존재하지 않고, threshold를 만족할 때 { $termRelation[$termIdx2Id[$j]][$termIdx2Id[$i]] = $overTreeRelation; $tempGraph = $myGraph[$termIdx2Id[$i]]; for($r=0; $r<$numTerm; $r++) { if($myGraph[$termIdx2Id[$r]]==$tempGraph) { $myGraph[$termIdx2Id[$r]]=$myGraph[$termIdx2Id[$j]]; } } $treeIndex[$myGraph[$termIdx2Id[$i]]] = 1; $termHavingRelation[$termIdx2Id[$i]] = $whoIsMyRoot[$termIdx2Id[$i]]; $termHavingRelation[$termIdx2Id[$j]] = $whoIsMyRoot[$termIdx2Id[$j]]; // find ancestors of $termIdx2Id[$j] $ancestorList = array(); $candidateList[$termIdx2Id[$j]] = false; $ancestorList = findAncestors($j, $ancestorList, $termIdx2Id, $termRelation, $parentChildRelation, $overTreeRelation, $numTerm); foreach($ancestorList as $key => $val) { $candidateList[$key] = false; } } } } $currentTime = floatMicrotime(); if(@$_REQUEST["debug"] == 1) echo 'f6_COMPACT_compact:'. ($currentTime-$f6).'
'; if(@$_REQUEST["debug"] == 1) echo 'f5+f6 = COMPACT:'. ($currentTime - $f5_compact).'
'; /* Pairwise로 sibling 찾기 */ $f6_sibling = floatMicrotime(); for($i=0; $i<$numTerm-1; $i++) { for($j=$i+1; $j<$numTerm; $j++) { $proiGivenj = $projGiveni = 0; if($termTerm[$termIdx2Id[$i]][$termIdx2Id[$j]]!=0) // term_i와 term_j가 같이 나오는 document가 있어요 { $proiGivenj = $termTerm[$termIdx2Id[$i]][$termIdx2Id[$j]] / $docFreq[$termIdx2Id[$j]]; // P(Term_i | Term_j) $projGiveni = $termTerm[$termIdx2Id[$i]][$termIdx2Id[$j]] / $docFreq[$termIdx2Id[$i]]; // P(Term_j | Term_i) // $proiGivenj = $termTerm[$termIdx2Id[$i]][$termIdx2Id[$j]] / $docFreq[$termIdx2Id[$j]]; // P(Term_i | Term_j) // $projGiveni = $termTerm[$termIdx2Id[$i]][$termIdx2Id[$j]] / $docFreq[$termIdx2Id[$i]]; // P(Term_j | Term_i) if(($proiGivenj > $siblingThreshold) && ($projGiveni > $siblingThreshold)) // Term_i와 Term_j 는 sibling { $termRalationSibling[$termIdx2Id[$j]][$termIdx2Id[$i]] = $RELATION_SIBLING; $tempGraph = $myGraph[$termIdx2Id[$i]]; for($r=0; $r<$numTerm; $r++) { if($myGraph[$termIdx2Id[$r]]==$tempGraph) { $myGraph[$termIdx2Id[$r]]=$myGraph[$termIdx2Id[$j]]; } } $termHavingRelation[$termIdx2Id[$i]] = $whoIsMyRoot[$termIdx2Id[$i]]; $termHavingRelation[$termIdx2Id[$j]] = $whoIsMyRoot[$termIdx2Id[$j]]; } } } } if(@$_REQUEST["debug"] == 1) echo 'f6_sibling detection:'. (floatMicrotime()-$f6_sibling).'
'; $f7 = floatMicrotime(); ////////////////////////////////////////// // term index와 실제 문자열과 맵핑 리스트 가져오기 // -------------------------- // 쿼리 생성: term index를 실제 문자열과 맵핑하는 배열 생성. $query = "SELECT fnum, termname, type FROM idf WHERE fnum in ("; $count = 0; foreach($termIdx2Id as $termId) { if($count != 0) $query .=","; $query .= "$termId"; $count++; } $query .=");"; $termNameMapping = array(); if($mysql_result = mysqli_query($mysqlLink, $query)) { while($row = mysqli_fetch_assoc($mysql_result)) { $termNameMapping[$row["fnum"]] = $row["termname"]; } } mysqli_free_result($mysql_result); unset($mysql_result); if(@$_REQUEST["debug"] == 1) echo 'f7 term index와 실제 문자열과 맵핑 리스트 가져오기:'. (floatMicrotime()-$f7).'
'; $f8 = floatMicrotime(); // Sibiling 처리 // ============================= // sibling 끼리 그룹으로 묶습니다. sibling 끼리는 undirected graph $siblingGroup = array(); $remainSiblingCheck = $termRalationSibling; // print_r($remainSiblingCheck); // echo "
"; while(sizeof($remainSiblingCheck)>0) { $currentSiblingGroup = array(); $remainKeys = array_keys($remainSiblingCheck); // 현재 텀과, 현재 텀의 sibling을 sibling 그룹에 추가 $currentSiblingGroup[] = $remainKeys[0]; $currentSiblingGroup = array_merge($currentSiblingGroup, array_keys($remainSiblingCheck[$remainKeys[0]])); unset($remainSiblingCheck[$remainKeys[0]]); // var_dump($remainSiblingCheck); $siblingGroup[] = $currentSiblingGroup; } // echo "
"; // print_r($siblingGroup); // echo "
"; $deleteIndex = array(); // 각각 그룹별로 겹치는게 있으면 하나의 그룹으로 통합 for($i=0;$i=0;$i--) { unset($siblingGroup[$deleteIndex[$i]]); } $siblingGroup = array_values($siblingGroup); // print_r($siblingGroup); // echo "
"; // sibling 관계에 자손들과 parents-child 관계인 경우 sibling을 가리키도록 변경 // var_dump($siblingGroup); for($i=0;$i"; print_r($siblingGroup[$i]); echo "
"; } for($j=0;$j{$termNameMapping[$termIdJ]}, {$termNameMapping[$termIdK]}
"; $overTreeDirection = 0; // var_dump($result_intersect); foreach($result_intersect as $key => $value) { // echo "__{$overTreeDirection},i:{$i},j:{$j},k:{$k},value:{$value},{$termRelation[$termIdJ][$value]}, {$termRelation[$termIdK][$value]}" ; if($termRelation[ $termIdJ ][$value] ==$overTreeRelation && (isset($termRelation[ $termIdK ][$value]) && $termRelation[ $termIdK ][$value] == $parentChildRelation)) { unset($termRelation[ $termIdJ ][$value]); // if(empty($termRelation[ $siblingGroup[$i][$j] ])) // { // unset($termRelation[ $siblingGroup[$i][$j] ]); // } $overTreeDirection |= 1; } if((isset($termRelation[ $termIdJ ][$value]) && $termRelation[ $termIdJ ][$value] == $parentChildRelation) && ($termRelation[ $termIdK ][$value] == $overTreeRelation )) { unset($termRelation[ $termIdK ][$value]); // if(empty($termRelation[ $siblingGroup[$i][$k] ])) // { // unset($termRelation[ $siblingGroup[$i][$k] ]); // } $overTreeDirection |= 2; } // echo "{$overTreeDirection}({$termNameMapping[$value]})
"; } // echo "
".$overTreeDirection; if($overTreeDirection & 1) { $termRelation[ $siblingGroup[$i][$j]][$siblingGroup[$i][$k]] = $overTreeRelation; // echo "->:{$termNameMapping[$siblingGroup[$i][$j]]}, {$termNameMapping[$siblingGroup[$i][$k]]}
"; } if($overTreeDirection & 2) { $termRelation[ $siblingGroup[$i][$k]][$siblingGroup[$i][$j]] = $overTreeRelation; // echo "<-:{$termNameMapping[$siblingGroup[$i][$j]]}, {$termNameMapping[$siblingGroup[$i][$k]]}
"; } } } } } } // print_r($termRelation); // print_r($termNameMapping); if(@$_REQUEST["debug"] == 1) echo 'f8:'. (floatMicrotime()-$f8).'
'; //소요 시간 출력 if(@$_REQUEST["debug"] == 1) echo "time: ".(floatMicrotime()-$startTime)."
"; $numOfParentChild = 0; $numOfSibling = 0; $xml = ""; $xml .= "getKeyword()}\" totalDoc=\"{$numOfDocuments}\" >\n"; // perent-child relation foreach($termRelation as $key1 => $val1) { foreach($val1 as $key2 => $val2) // $key1 : term1, $key2 : term2, $val2 : relation { if($val2==$parentChildRelation) { $xml .= "\n"; //parent 정보 출력 $xml .= ""; $xml .= $termNameMapping[$key1]."\n"; //child 정보 출력 $xml .= ""; $xml .= $termNameMapping[$key2]."\n"; $xml .= "\n"; $numOfParentChild++; } } } // sibling relation foreach($termRalationSibling as $key1 => $val1) { foreach($val1 as $key2 => $val2) // $key1 : term1, $key2 : term2, $val2 : relation { if($val2==$RELATION_SIBLING) { $xml .= "\n"; //sibling1 정보 출력 $xml .= ""; $xml .= $termNameMapping[$key1]."\n"; //sibling2 정보 출력 $xml .= ""; $xml .= $termNameMapping[$key2]."\n"; $xml .= "\n"; $numOfSibling++; } } } // over-tree relation foreach($termRelation as $key1 => $val1) { foreach($val1 as $key2 => $val2) // $key1 : term1, $key2 : term2, $val2 : relation { if($val2==$overTreeRelation) { $xml .= "\n"; //parent 정보 출력 $xml .= ""; $xml .= $termNameMapping[$key1]."\n"; //child 정보 출력 $xml .= ""; $xml .= $termNameMapping[$key2]."\n"; $xml .= "\n"; $numOfParentChild++; } } } // relation을 가지지 않은 node $xml .= "\n"; foreach($docFreq as $key => $val) { if(array_key_exists($key, $termHavingRelation)==false) { $xml .= ""; $xml .= $termNameMapping[$key]."\n"; } } $xml .= "\n"; $xml .= "\n"; if(@$_REQUEST["debug"] == 1) { echo "# of retrieved docs:".$numOfDocuments."
"; echo "# of concepts".sizeof($termHavingRelation)."
"; echo "# of parent-child relation".$numOfParentChild."
"; echo "# of sibling relation".$numOfSibling."
"; echo $xml; } $ontolotyMapBefore=' unsupported flash web browser"; echo ""; /* // term1 (term1의 root, term1이 속한 그래프번호) term2 (term2의 root, term2가 속한 그래프 번호) : term1과 term2의 relation foreach($termRelation as $key1 => $val1) { foreach($val1 as $key2 => $val2) { echo $key1."(R: ".$whoIsMyRoot[$key1].", G: ".$myGraph[$key1].", F: ".$docFreq[$key1]." ) ".$key2."(R: ".$whoIsMyRoot[$key2].", G: ".$myGraph[$key2].", F: ".$docFreq[$key2]." ) : ".$val2."
"; } } */ unset($termRelation); unset($termRalationSibling); unset($termIdx2Id); unset($termEntropy); unset($myGraph); unset($treeIndex); unset($whoIsMyRoot); unset($edgeFromRoot); unset($termTerm); unset($treeIndex); unset($docFreq); unset($termNameMapping); unset($termTypeMapping); } }// endif 내꺼 몽땅 ! return; /*********************************************************/ $tagCloudBefore=' getMeshCount(); $increment=-1; $what=$searchOption->getMeshCount(); if($mysql_result = mysqli_query($mysqlLink, $query)) { while($row = mysqli_fetch_assoc($mysql_result)) { $increment++; $meshNameList[] = $row["name"]; $meshName = $row["name"]; $count = $row["count(mesh.name)"]; $frequency[$increment]=$count; $nextKeyword = "{$searchOption->getKeyword()} AND {$meshName}[MeSH Major Topic]"; // .urlencode(." and ($meshName[MeSH Major Topic])") // $linkURL = "http://dm.postech.ac.kr/refmed/?keyword=({$searchOption->getKeyword()}) AND ({$meshName}[MeSH Major Topic])"; $linkURL = "http://dm.postech.ac.kr/refmed/?keyword=". urlencode($nextKeyword); //$tagcloud = $tagcloud . "\n"; $size = 17-12/$what*$increment; $tagcloud = $tagcloud . "\n"; /*if($increment<(5)){ $tagcloud = $tagcloud . "\n"; } else if($increment>=(5)){ $tagcloud = $tagcloud . "\n"; } */ } mysqli_free_result($mysql_result); unset($mysql_result); } echo urlencode("$tagcloud")."\" />

$tagcloud

"; ?>
"; for($i=0;$i<$searchOption->getMeshCount();$i++){ echo"$i -> $meshNameList[$i], the number of occurence: $frequency[$i]"; echo"
"; } for($i=0;$i<$searchOption->getMeshCount();$i++){ $frequency[$i]=pow($frequency[$i]/$localTotalCount, 2)*pow(-($frequency[$i]/$localTotalCount)*log($frequency[$i]/$localTotalCount, 10), 2); echo"$i -> $meshNameList[$i], the number of occurence: $frequency[$i]"; echo"
"; } */ }
Warning: session_start(): Cannot send session cookie - headers already sent by (output started at /var/www/html/refmed/phpspeedy/class.compressor.php:722) in /var/www/html/refmed/index.php on line 43

Warning: session_start(): Cannot send session cache limiter - headers already sent (output started at /var/www/html/refmed/phpspeedy/class.compressor.php:722) in /var/www/html/refmed/index.php on line 43

Fatal error: Class 'compressor' not found in /var/www/html/refmed/index.php on line 50