AnalyzerImp, 分词类 package OEI.ME.analysis.E; import java.io.IOException; import java.util.List; import java.util.Map; //import java.util.concurrent.ConcurrentHashMap; import AVQ.ASQ.OVQ.OSQ.VSQ.obj.FMHMMNode; //import AVQ.ASQ.OVQ.OSQ.VSQ.obj.WordFrequency; import SVQ.stable.StablePOS; //import ME.utils.WordFrequencyUtil; import OCI.AVC.SUQ.SVQ.MPC.fhmm.C.EmotionMap; import OCI.ME.analysis.C.A; import OCI.ME.liner.C.Quick6DLuoYaoguangSort; import OCI.ME.nero.C.NERO_C_OneTime; //import OCI.ME.nlp.C.NLP_C; //import OCI.ME.pos.C.POS_C; //import OCI.ME.pos.C.Pos_C_XCDX; import OCI.ME.pos.C.Pos_C_XCDX_P; import OCI.SVQ.MPC.fhmm.C.FHMMList; import OEI.AVC.SUQ.SVQ.MPC.fhmm.E.EmotionMap_E; import OEI.ME.liner.E.Quick6DLuoYaoguangSort3DMap_E; import OEI.ME.nero.E.NERO_C_OneTime_E; //import OEI.ME.nlp.E.NLP_CE; import OEI.ME.nlp.E.Nlp_CE_XCDX_S; //import OEI.ME.pos.E.POS_CE; //import OEI.ME.pos.E.Pos_CE_XCDX; import OEI.ME.pos.E.Pos_CE_XCDX_P; import OEI.SVQ.MPC.fhmm.E.FMHMMListOneTime_E; import java.util.Iterator; import java.util.LinkedList; import PEQ.AMV.ECS.test.SensingTest; //作者,著作权人: 罗瑶光,浏阳 public class AE implements A { protected FHMMList fHMMList; protected NERO_C_OneTime neroController; protected Nlp_CE_XCDX_S nlpController; protected Pos_C_XCDX_P posController; protected Quick6DLuoYaoguangSort quick6DLuoYaoguangSort; protected Map forestRoots; protected Map> wordsForests; protected Map []forestsRoots; protected Map wordsForest; protected EmotionMap emotionMap; protected SensingTest sensingTest; public void IV_() throws IOException { this.fHMMList= new FMHMMListOneTime_E(); fHMMList.index(); fHMMList.indexPosEnToCn(); fHMMList.indexPosEnToEn(); fHMMList.indexEnToCn(); fHMMList.indexCnToEn(); fHMMList.indexFullEnToCn(); fHMMList.indexFullCnToEn(); neroController= new NERO_C_OneTime_E(); nlpController= new Nlp_CE_XCDX_S(); posController= new Pos_CE_XCDX_P(); quick6DLuoYaoguangSort= new Quick6DLuoYaoguangSort3DMap_E(); forestRoots= fHMMList.getMap(); forestsRoots= fHMMList.getMaps(); wordsForest= fHMMList.getPosCnToCn(); wordsForests= fHMMList.getWordsForests(); } public void IV_Mixed() throws IOException { this.fHMMList= new FMHMMListOneTime_E(); fHMMList.indexMixed(); fHMMList.indexPosEnToCn(); fHMMList.indexPosEnToEn(); fHMMList.indexEnToCn(); fHMMList.indexCnToEn(); fHMMList.indexFullEnToCn(); fHMMList.indexFullCnToEn(); fHMMList.indexFullCnToPy(); fHMMList.indexFullCnToKo(); fHMMList.indexFullCnToJp(); fHMMList.indexFullCnToTt(); fHMMList.indexFullCnToRs(); fHMMList.indexFullCnToAb(); neroController= new NERO_C_OneTime_E(); nlpController= new Nlp_CE_XCDX_S(); posController= new Pos_CE_XCDX_P(); quick6DLuoYaoguangSort= new Quick6DLuoYaoguangSort3DMap_E(); forestRoots= fHMMList.getMap(); forestsRoots= fHMMList.getMaps(); wordsForest= fHMMList.getPosCnToCn(); wordsForests= fHMMList.getWordsForests(); emotionMap= new EmotionMap_E(); emotionMap.IV_MotivationMap(); emotionMap.IV_NegativeMap(); emotionMap.IV_PositiveMap(); emotionMap.IV_TrendingMap(); emotionMap.IV_PredictionMap(); sensingTest= new SensingTest(); } public void IV_All() throws IOException { this.fHMMList= new FMHMMListOneTime_E(); fHMMList.indexAll(); fHMMList.indexPosEnToCn(); fHMMList.indexPosEnToEn(); fHMMList.indexEnToCn(); fHMMList.indexCnToEn(); fHMMList.indexFullEnToCn(); fHMMList.indexFullCnToEn(); fHMMList.indexFullCnToPy(); fHMMList.indexFullCnToKo();; fHMMList.indexFullCnToJp();; fHMMList.indexFullCnToTt(); fHMMList.indexFullCnToRs(); fHMMList.indexFullCnToAb(); neroController= new NERO_C_OneTime_E(); nlpController= new Nlp_CE_XCDX_S(); posController= new Pos_CE_XCDX_P(); quick6DLuoYaoguangSort= new Quick6DLuoYaoguangSort3DMap_E(); forestRoots= fHMMList.getMap(); forestsRoots= fHMMList.getMaps(); wordsForest= fHMMList.getPosCnToCn(); wordsForests= fHMMList.getWordsForests(); emotionMap= new EmotionMap_E(); emotionMap.IV_MotivationMap(); emotionMap.IV_NegativeMap(); emotionMap.IV_PositiveMap(); emotionMap.IV_TrendingMap(); emotionMap.IV_PredictionMap(); sensingTest= new SensingTest(); } public List parserMixedString(String mixedString) { mixedString+= StablePOS.SPACE_STRING_DISTINCTION; int inputStringLength= mixedString.length(); List outputList = new LinkedList<>(); int forestDepth = StablePOS.INT_ZERO; int countInputStringLength; StringBuilder[] fixWords = new StringBuilder[StablePOS.INT_TWO]; fixWords[StablePOS.INT_ZERO] = new StringBuilder(); fixWords[StablePOS.INT_ONE] = new StringBuilder(); StringBuilder stringBuilder = new StringBuilder(); int find = StablePOS.INT_ZERO; Here: for (int charPosition = StablePOS.INT_ZERO ;charPosition it = fHMMList.englishStringToWordsList( fixWords[StablePOS.INT_ZERO].toString()).iterator(); StringBuilder number= new StringBuilder(); while(it.hasNext()) { String temp = it.next(); if(StablePOS.NUMBERS.contains(temp)) { number.append(temp); }else { if(number.length()>0) { outputList.add(number.toString()); number.delete(0, number.length()); } outputList.add(temp); } } if(number.length()>0) { outputList.add(number.toString()); number.delete(0, number.length()); } fixWords[StablePOS.INT_ZERO].delete(StablePOS.INT_ZERO , fixWords[StablePOS.INT_ZERO].length()); } stringBuilder.delete(StablePOS.INT_ZERO, stringBuilder.length()); stringBuilder = neroController.getBinaryForestRecurWordOneTime(stringBuilder.append(mixedString .charAt(charPosition)), mixedString, charPosition, inputStringLength , forestRoots, forestDepth, charPosition + StablePOS.INT_ONE); String countWordNode = stringBuilder.toString(); int compare = countInputStringLength = countWordNode.length(); if (StablePOS.INT_ONE == compare) { outputList.add(countWordNode); fixWords[StablePOS.INT_ZERO].delete(StablePOS.INT_ZERO , fixWords[StablePOS.INT_ZERO].length()); fixWords[StablePOS.INT_ZERO].append(countWordNode); continue Here; } if (StablePOS.INT_TWO == compare) { countInputStringLength = nlpController.doSlangPartAndPOSCheckForTwoChar(countInputStringLength , outputList, stringBuilder, wordsForest, fixWords, posController , charPosition, mixedString); continue Here; } if (StablePOS.INT_THREE == compare) { I_FixWords(charPosition, mixedString, fixWords); countInputStringLength = nlpController.doPOSAndEMMCheckOfThree(countInputStringLength, outputList , wordsForest, stringBuilder, fixWords, posController, charPosition , mixedString); continue Here; } if (StablePOS.INT_FOUR == compare) { I_FixWords(charPosition, mixedString, fixWords); countInputStringLength = nlpController.doSlangCheck(countInputStringLength, outputList, stringBuilder , wordsForest, fixWords, posController, charPosition, mixedString); } } return outputList; } public List parserString(String inputString) { List outputList= new LinkedList<>(); int inputStringLength= inputString.length(); int forestDepth= StablePOS.INT_ZERO; int countInputStringLength; StringBuilder[] fixWords= new StringBuilder[StablePOS.INT_TWO]; fixWords[StablePOS.INT_ZERO]= new StringBuilder(); fixWords[StablePOS.INT_ONE]= new StringBuilder(); StringBuilder stringBuilder= new StringBuilder(); int find= StablePOS.INT_ZERO; Here: for (int charPosition= StablePOS.INT_ZERO; charPosition< inputStringLength ; charPosition+= (countInputStringLength!= StablePOS.INT_ZERO ? countInputStringLength: StablePOS.INT_ONE)) { if(StablePOS.INT_ONE_TWO_EIGHT> inputString.charAt(charPosition)){ if(fixWords[StablePOS.INT_ZERO].length()> StablePOS.INT_ZERO) { if(fixWords[StablePOS.INT_ZERO].charAt(fixWords[StablePOS.INT_ZERO] .length()- StablePOS.INT_ONE)< StablePOS.INT_ONE_TWO_EIGHT) { fixWords[StablePOS.INT_ZERO].append(inputString.charAt(charPosition)); countInputStringLength= StablePOS.INT_ONE; find= StablePOS.INT_ONE; continue Here; } fixWords[StablePOS.INT_ZERO].delete(StablePOS.INT_ZERO , fixWords[StablePOS.INT_ZERO].length()); } find= StablePOS.INT_ONE; fixWords[StablePOS.INT_ZERO].append(inputString.charAt(charPosition)); countInputStringLength= StablePOS.INT_ONE; continue Here; } if(find== StablePOS.INT_ONE) { find= StablePOS.INT_ZERO; outputList.add(fixWords[StablePOS.INT_ZERO].toString()); } stringBuilder.delete(StablePOS.INT_ZERO, stringBuilder.length()); stringBuilder = neroController.getBinaryForestRecurWordOneTime(stringBuilder.append(inputString .charAt(charPosition)), inputString, charPosition, inputStringLength , forestRoots, forestDepth, charPosition+ StablePOS.INT_ONE); String countWordNode= stringBuilder.toString(); int compare= countInputStringLength= countWordNode.length(); if (compare== StablePOS.INT_ONE) { outputList.add(countWordNode); fixWords[StablePOS.INT_ZERO].delete(StablePOS.INT_ZERO , fixWords[StablePOS.INT_ZERO].length()); fixWords[StablePOS.INT_ZERO].append(countWordNode); continue Here; } if (compare== StablePOS.INT_TWO) { countInputStringLength = nlpController.doSlangPartAndPOSCheckForTwoChar(countInputStringLength , outputList, stringBuilder, wordsForest, fixWords, posController , charPosition, inputString); continue Here; } if (compare== StablePOS.INT_THREE) { I_FixWords(charPosition, inputString, fixWords); countInputStringLength = nlpController.doPOSAndEMMCheckOfThree(countInputStringLength, outputList , wordsForest, stringBuilder, fixWords, posController, charPosition , inputString); continue Here; } if (compare== StablePOS.INT_FOUR) { I_FixWords(charPosition, inputString, fixWords); countInputStringLength = nlpController.doSlangCheck(countInputStringLength, outputList , stringBuilder, wordsForest, fixWords, posController, charPosition , inputString); } } return outputList; } public void I_FixWords(int charPosition, String inputString , StringBuilder[] fixWords) { fixWords[StablePOS.INT_ONE].delete(StablePOS.INT_ZERO , fixWords[StablePOS.INT_ONE].length()); if (charPosition + StablePOS.INT_EIGHT < inputString.length()) { fixWords[StablePOS.INT_ONE].append(inputString.substring(charPosition + StablePOS.INT_THREE , charPosition + StablePOS.INT_EIGHT)); return; } fixWords[StablePOS.INT_ONE].append(inputString.substring(charPosition + StablePOS.INT_THREE , inputString.length())); } public String[] parserEnglishString(String englishString) { String[] words = englishString.replaceAll(StablePOS.NLP_SPASE_REP , StablePOS.SPACE_STRING) .split(StablePOS.SPACE_STRING); if(StablePOS.INT_ZERO == words.length ) { return new String[] {StablePOS.SPACE_STRING}; } return words; } }