如何将全文检索中的“干扰词”去除

包括以下内容,然后调用:remove_noise_word()即可

 1<script language="javascript">   
 2<!--   
 3noise_word_list_ch = new Array("?","about","$","1","2","3","4","5","6","7","8","9","0","_",   
 4"a","b","c","d","e","f","g","h","i","j","k","l","m","n","o",   
 5"p","q","r","s","t","u","v","w","x","y","z","after","all","also",   
 6"an","and","another","any","are","as","at","be","because","been",   
 7"before","being","between","both","but","by","came","can","come",   
 8"could","did","do","each","for","from","get","got","had","has",   
 9"have","he","her","here","him","himself","his","how","if","in","into",   
10"is","it","like","make","many","me","might","more","most","much","must",   
11"my","never","now","of","on","only","or","other","our","out","over","said",   
12"same","see","should","since","some","still","such","take","than","that",   
13"the","their","them","then","there","these","they","this","those","through",   
14"to","too","under","up","very","was","way","we","well","were","what","where",   
15"which","while","who","with","would","you","your",   
16"的","一","不","在","人","有","是","为","以","于","上","他","而","后","之","来",   
17"及","了","因","下","可","到","由","这","与","也","此","但","并","个","其","已",   
18"无","小","我","们","起","最","再","今","去","好","只","又","或","很","亦","某",   
19"把","那","你","乃","它"); 
20
21function trim_str_key(inputVal){   
22inputStr = inputVal.toString()   
23while ((inputStr.charAt(inputStr.length - 1) == " ") || (inputStr.charAt(0) == " ")){ 
24
25//如果最右边为空格则删去   
26if (inputStr.charAt(inputStr.length - 1) == " "){   
27inputStr = inputStr.substring(0,inputStr.length - 1)   
28}   
29//如果最左边为空格则删去   
30if (inputStr.charAt(0) == " "){   
31inputStr = inputStr.substring(1,inputStr.length)   
32}   
33}   
34return inputStr   
35} 
36
37function is_ch_noise_word(str_key){   
38var key_word = trim_str_key(str_key);   
39key_word = key_word.toLowerCase();   
40var listlength=noise_word_list_ch.length;   
41var tmp_str = "";   
42for(i=0;i<listlength;i++){   
43tmp_str = noise_word_list_ch[i]   
44if(tmp_str==key_word){   
45return true;   
46}   
47}   
48return false;   
49} 
50
51function remove_noise_word(str_source){   
52var tmp_str = "";   
53var ch = "";   
54var str_out = "";   
55var i = 0;   
56str_source = trim_str_key(str_source);   
57var str_source_length = str_source.length;   
58  
59if(str_source_length == 0){   
60return str_out;   
61}   
62  
63for (i=0;i < str_source_length; i++){   
64ch = str_source.charAt(i);   
65if(ch==" "){ //如果为空格则表示是下一个关键词   
66if(!(is_ch_noise_word(tmp_str))){ //不是干扰词就输出   
67if(tmp_str!=" "){ //防止连续的两个空格   
68str_out = str_out + tmp_str + " ";   
69}   
70}   
71tmp_str = "";   
72}   
73else{   
74tmp_str = tmp_str + ch;   
75}   
76}   
77str_out = str_out + tmp_str;   
78return trim_str_key(str_out);   
79} 
80
81//下面是一个测试   
82//var abc = "av n";   
83//var nnnn = remove_noise_word(abc);   
84//alert(nnnn);   
85//-->   
86</script>
Published At
Categories with 数据库类
Tagged with
comments powered by Disqus