Analyzer, 分词类 package OCI.ME.analysis.C; import java.io.IOException; import java.util.List; //import java.util.Map; // //import AVQ.ASQ.OVQ.OSQ.VSQ.obj.WordFrequency; //import OCI.AVC.SUQ.SVQ.MPC.fhmm.C.EmotionMap; //import PEQ.AMV.ECS.test.SensingTest; //作者,著作权人: 罗瑶光, 浏阳。 public interface A { void IV_() throws IOException; void IV_Mixed() throws IOException; void IV_All() throws IOException;//我给出了一个扩展多语种示例 List parserString(String input); void I_FixWords(int charPosition, String inputString,StringBuilder[] fixWords); String[] parserEnglishString(String englishString); List parserMixedString(String mixedString); } //整体设计好后就分出去。 package OEI.ME.analysis.E; import java.io.IOException; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import AVQ.ASQ.OVQ.OSQ.VSQ.obj.FMHMMNode; import AVQ.ASQ.OVQ.OSQ.VSQ.obj.WordFrequency; import ME.utils.WordFrequencyUtil; import OCI.AVC.SUQ.SVQ.MPC.fhmm.C.EmotionMap; import OCI.ME.analysis.C.CogsBinaryForest_A; import SVQ.stable.StablePOS; //作者,著作权人: 罗瑶光,浏阳 public class CogsBinaryForest_AE extends BinaryForest_AE implements CogsBinaryForest_A { public Map parserStringByReturnFrequencyMap(String inputString) { Map wordsForest = fHMMList.getPosCnToCn(); Map outputList = new ConcurrentHashMap<>(); Map forestRoots = fHMMList.getMap(); int inputStringLength = inputString.length(); int forestDepth = StablePOS.INT_ZERO; int countInputStringLength; StringBuilder[] fixWords = new StringBuilder[StablePOS.INT_TWO]; fixWords[StablePOS.INT_ZERO] = new StringBuilder(); fixWords[StablePOS.INT_ONE] = new StringBuilder(); StringBuilder stringBuilder = new StringBuilder(); int find = StablePOS.INT_ZERO; Here: for (int charPosition= StablePOS.INT_ZERO; charPosition< inputStringLength ; charPosition+= (countInputStringLength== StablePOS.INT_ZERO ? StablePOS.INT_ONE: countInputStringLength)) { if(inputString.charAt(charPosition)< StablePOS.INT_ONE_TWO_EIGHT){ if(fixWords[StablePOS.INT_ZERO].length()> StablePOS.INT_ZERO) { if(fixWords[StablePOS.INT_ZERO].charAt( fixWords[StablePOS.INT_ZERO].length()- StablePOS.INT_ONE) < StablePOS.INT_ONE_TWO_EIGHT) { fixWords[StablePOS.INT_ZERO].append(inputString.charAt(charPosition)); countInputStringLength= StablePOS.INT_ONE; find = StablePOS.INT_ONE; continue Here; } fixWords[StablePOS.INT_ZERO].delete(StablePOS.INT_ZERO , fixWords[StablePOS.INT_ZERO].length()); } find=StablePOS.INT_ONE; fixWords[StablePOS.INT_ZERO].append(inputString.charAt(charPosition)); countInputStringLength = StablePOS.INT_ONE; continue Here; } if(find == StablePOS.INT_ONE) { find = StablePOS.INT_ZERO; WordFrequencyUtil.WordFrequencyFindCheck(outputList, fixWords); } stringBuilder.delete(StablePOS.INT_ZERO, stringBuilder.length()); stringBuilder = neroController.getBinaryForestRecurWordOneTime(stringBuilder.append(inputString .charAt(charPosition)), inputString, charPosition, inputStringLength , forestRoots, forestDepth , charPosition + StablePOS.INT_ONE); String countWordNode = stringBuilder.toString(); int compare = countInputStringLength = countWordNode.length(); if (compare == StablePOS.INT_ONE) { WordFrequencyUtil.WordFrequencyCompareCheck(outputList, fixWords , countWordNode); fixWords[StablePOS.INT_ZERO].delete(StablePOS.INT_ZERO , fixWords[StablePOS.INT_ZERO].length()); fixWords[StablePOS.INT_ZERO].append(countWordNode); continue Here; } if (compare == StablePOS.INT_TWO) { countInputStringLength = nlpController.doSlangPartAndPOSCheckForTwoCharForMap(countInputStringLength , outputList, stringBuilder, wordsForest, fixWords, posController); continue Here; } if (compare == StablePOS.INT_THREE) { I_FixWords(charPosition, inputString, fixWords); countInputStringLength = nlpController.doPOSAndEMMCheckOfThreeForMap(countInputStringLength , outputList, wordsForest, stringBuilder, fixWords, posController); continue Here; } if (compare == StablePOS.INT_FOUR) { I_FixWords(charPosition, inputString, fixWords); countInputStringLength = nlpController.doSlangCheckForMap(countInputStringLength, outputList , stringBuilder, wordsForest, fixWords, posController); } } return outputList; } public Map getWordFrequencyMap(List sets) throws IOException { Map map = new ConcurrentHashMap<>(); Iterator iterator = sets.iterator(); Here: while(iterator.hasNext()){ String setOfi = iterator.next(); if (map.containsKey(setOfi)) { WordFrequency wordFrequency = map.get(setOfi); wordFrequency.I_Frequency(wordFrequency.getFrequency() + StablePOS.INT_ONE); map.put(setOfi, wordFrequency); continue Here; } WordFrequency wordFrequency = new WordFrequency(); wordFrequency.I_Frequency(StablePOS.INT_ONE); wordFrequency.I_Word(setOfi); map.put(setOfi, wordFrequency); } return map; } //计算函数以后移出 DNA元基组 到RNA. public List sortWordFrequencyMap(Map map) throws IOException { List list = quick6DLuoYaoguangSort.frequencyWordMapToList(map); quick6DLuoYaoguangSort.quick6DLuoYaoGuangSortWordFrequency(list, StablePOS.INT_ZERO , list.size() - StablePOS.INT_ONE); return list; } public Map getWordFrequencyByReturnSortMap(List sets) throws IOException { return sortWordFrequencyMapToSortMap(getWordFrequencyMap(sets)); } public Map sortWordFrequencyMapToUnsortMap(Map map){ Map listMap = quick6DLuoYaoguangSort.frequencyWordMapToMap(map); return listMap; } public Map sortWordFrequencyMapToSortMap(Map map){ Map listMap = quick6DLuoYaoguangSort.frequencyWordMapToMap(map); quick6DLuoYaoguangSort.quick6DLuoYaoGuangSortWordFrequency(listMap, StablePOS.INT_ZERO , listMap.size() - StablePOS.INT_ONE); return listMap; } public Map parserMixStringByReturnFrequencyMap(String mixedString) { mixedString += StablePOS.SPACE_STRING; Map wordsForest = fHMMList.getPosCnToCn(); Map outputList = new ConcurrentHashMap<>(); Map forestRoots = fHMMList.getMap();//.getRoot(); int inputStringLength = mixedString.length(); int forestDepth = StablePOS.INT_ZERO; int countInputStringLength; StringBuilder[] fixWords = new StringBuilder[StablePOS.INT_TWO]; fixWords[StablePOS.INT_ZERO] = new StringBuilder(); fixWords[StablePOS.INT_ONE] = new StringBuilder(); StringBuilder stringBuilder = new StringBuilder(); int find = StablePOS.INT_ZERO; Here: for (int charPosition = StablePOS.INT_ZERO; charPosition < inputStringLength ; charPosition+= (countInputStringLength == StablePOS.INT_ZERO ? StablePOS.INT_ONE : countInputStringLength)) { //luan ma if(mixedString.charAt(charPosition) < StablePOS.INT_ONE_TWO_EIGHT && charPosition < mixedString.length()- StablePOS.INT_ONE){ if(find == StablePOS.INT_ZERO) { fixWords[StablePOS.INT_ZERO].delete(StablePOS.INT_ZERO , fixWords[StablePOS.INT_ZERO].length()); } fixWords[StablePOS.INT_ZERO].append(mixedString.charAt(charPosition)); countInputStringLength = StablePOS.INT_ONE; find = StablePOS.INT_ONE; continue Here; } if(find == StablePOS.INT_ONE) { find = StablePOS.INT_ZERO; Iterator it = fHMMList.englishStringToWordsList( fixWords[StablePOS.INT_ZERO].toString()).iterator(); while(it.hasNext()) { String temp=it.next(); if (outputList.containsKey(temp)) { WordFrequency wordFrequency = outputList.get(temp); wordFrequency.I_Frequency(wordFrequency.getFrequency() + StablePOS.INT_ONE); outputList.put(temp, wordFrequency); } else { WordFrequency wordFrequency = new WordFrequency(); wordFrequency.I_Frequency(StablePOS.INT_ONE); wordFrequency.I_Word(temp); outputList.put(temp, wordFrequency); } } fixWords[StablePOS.INT_ZERO].delete(StablePOS.INT_ZERO , fixWords[StablePOS.INT_ZERO].length()); } stringBuilder.delete(StablePOS.INT_ZERO, stringBuilder.length()); stringBuilder = neroController.getBinaryForestRecurWordOneTime(stringBuilder.append(mixedString .charAt(charPosition)), mixedString, charPosition, inputStringLength , forestRoots, forestDepth, charPosition + StablePOS.INT_ONE); String countWordNode = stringBuilder.toString(); int compare = countInputStringLength = countWordNode.length(); if (compare == StablePOS.INT_TWO) { countInputStringLength = nlpController.doSlangPartAndPOSCheckForTwoCharForMap(countInputStringLength , outputList, stringBuilder, wordsForest, fixWords, posController); continue Here; } if (compare == StablePOS.INT_THREE) { I_FixWords(charPosition, mixedString, fixWords); countInputStringLength = nlpController.doPOSAndEMMCheckOfThreeForMap(countInputStringLength , outputList, wordsForest, stringBuilder, fixWords, posController); continue Here; } if (compare == StablePOS.INT_ONE) { if (outputList.containsKey(countWordNode)) { WordFrequency wordFrequency = outputList.get(countWordNode); wordFrequency.I_Frequency(wordFrequency.getFrequency() + StablePOS.INT_ONE); outputList.put(countWordNode, wordFrequency); } else { WordFrequency wordFrequency = new WordFrequency(); wordFrequency.I_Frequency(StablePOS.INT_ONE); wordFrequency.I_Word(fixWords[StablePOS.INT_ZERO].toString()); outputList.put(countWordNode, wordFrequency); } fixWords[StablePOS.INT_ZERO].delete(StablePOS.INT_ZERO , fixWords[StablePOS.INT_ZERO].length()); fixWords[StablePOS.INT_ZERO].append(countWordNode); continue Here; } if (compare == StablePOS.INT_FOUR) { I_FixWords(charPosition, mixedString, fixWords); countInputStringLength = nlpController.doSlangCheckForMap(countInputStringLength, outputList , stringBuilder, wordsForest, fixWords, posController); } } return outputList; } public List getWordFrequency(List sets) throws IOException { return sortWordFrequencyMap(getWordFrequencyMap(sets)); } public EmotionMap getEmotionMap() { return this.emotionMap; } }