diff --git a/webshell.py b/webshell.py
index 2a78b79..c25c774 100644
--- a/webshell.py
+++ b/webshell.py
@@ -25,6 +25,7 @@ config.gpu_options.allow_growth = True
 sess0 = tf.InteractiveSession(config=config)
 '''
 g_word_dict = {}
+g_keras_token = None
 
 
 def os_listdir_ex(file_dir, find_name):  # 祖传代码
@@ -114,7 +115,41 @@ def vectorize_sequences(sequences, dimention=1337):
     return results
 
 
-def get_file_word_bag(pFile):
+def get_word_bag(pWordList):
+    global g_word_dict
+    global g_keras_token
+    sequences_data = g_keras_token.texts_to_sequences(pWordList)
+    word_bag = []
+    for index in range(0, len(sequences_data)):
+        if len(sequences_data[index]) != 0:
+            for zeus in range(0, len(sequences_data[index])):
+                word_bag.append(sequences_data[index][zeus])
+    return word_bag
+
+
+def set_word_bag(pWordList):
+    global g_word_dict
+    global g_keras_token
+    if g_keras_token == None:
+        g_keras_token = keras.preprocessing.text.Tokenizer()  # 初始化标注器
+    g_keras_token.fit_on_texts(pWordList)  # 学习出文本的字典
+    g_word_dict.update(g_keras_token.word_index)
+
+
+def get_file_word(pFile):
+    global g_word_dict
+    english_punctuations = [',', '.', ':', ';', '?',
+                            '(', ')', '[', ']', '&', '!', '*', '@', '#', '$', '%', 'php', '<', '>', '\'']
+    clean_string = flush_file(pFile)
+    word_list = nltk.word_tokenize(clean_string)
+    # 过滤掉不干净的
+    word_list = [
+        word_iter for word_iter in word_list if word_iter not in english_punctuations]
+    # anti-paste
+    return word_list
+
+
+def get_file_word_bag_non_use(pFile):
     global g_word_dict
     english_punctuations = [',', '.', ':', ';', '?',
                             '(', ')', '[', ']', '&', '!', '*', '@', '#', '$', '%', 'php', '<', '>', '\'']
@@ -264,7 +299,11 @@ if os.path.exists("save.csv") == False:
         data_frame['entropy'].values.reshape(-1, 1), scaler_entropy.fit(data_frame['entropy'].values.reshape(-1, 1)))
     # 导入词袋
     data_frame['word_bag'] = data_frame['file'].map(
-        lambda file_name: get_file_word_bag(file_name))
+        lambda file_name: get_file_word(file_name))
+    set_word_bag(data_frame['word_bag'])
+    data_frame['word_bag'] = data_frame['word_bag'].map(
+        lambda text_params: get_word_bag(text_params))
+
     data_frame.to_csv("save.csv")
     dump(scaler_length, 'scaler_length.joblib')
     dump(scaler_entropy, 'scaler_entropy.joblib')