Advertisement
Guest User

Untitled

a guest
Jun 26th, 2019
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.57 KB | None | 0 0
  1. from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
  2. from keras.models import Sequential
  3. from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D
  4. from keras.layers import Activation, Dropout, Flatten, Dense
  5. from keras import backend as K
  6. from keras.callbacks import TensorBoard
  7. import os
  8. from shutil import copyfile
  9. import shutil
  10. import sys
  11. from keras.models import model_from_json
  12. from sklearn.svm import SVC
  13. from sklearn.preprocessing import MinMaxScaler
  14.  
  15. img_width, img_height = 150, 150
  16. train_data_dir = 'multi_train/'
  17. validation_data_dir = 'multi_validation/'
  18. nb_train_samples = 8404
  19. nb_validation_samples = 935
  20. epochs = 100
  21. batch_size = 16
  22.  
  23. def create_dataset(main_path, ratio):
  24. #main_path = 'malicious'
  25. train_path = os.getcwd() + '/multi_train/' + main_path
  26. val_path = os.getcwd() + '/multi_validation/' + main_path
  27.  
  28. # check path exists
  29. if (not os.path.exists(main_path)):
  30. print("dataset not found")
  31. sys.exit(0)
  32. else:
  33. # delete existing files
  34.  
  35. if(os.path.exists(train_path)):
  36. shutil.rmtree(train_path)
  37. os.makedirs(train_path)
  38.  
  39.  
  40. if(os.path.exists(val_path)):
  41. shutil.rmtree(val_path)
  42. os.makedirs(val_path)
  43.  
  44.  
  45. main_files = [os.path.join(main_path, f) for f in os.listdir(main_path)]
  46. # copy train files
  47. for t in main_files[0:ratio]:
  48. t_base = os.path.basename(t)
  49. copyfile(t, train_path + t_base)
  50. # copy test files
  51. for v in main_files[ratio:]:
  52. v_base = os.path.basename(v)
  53. copyfile(v, val_path + v_base)
  54. print("OK")
  55.  
  56. # SVM
  57. def svc(traindata, trainlabel, testdata, testlabel):
  58. print("Start training SVM...")
  59. svcClf = SVC(C=1.0, kernel="rbf", cache_size = 3000)
  60. svcClf.fit(traindata, trainlabel)
  61.  
  62. pred_testlabel = svcClf.predict(testdata)
  63. num = len(pred_testlabel)
  64. accuracy = len([1 for i in range(num) if testlabel[i] == pred_testlabel[i]]) / float(num)
  65. print("cnn-svm Accuracy:", accuracy)
  66.  
  67.  
  68. def create_model(input_shape):
  69. model = Sequential()
  70. model.add(Conv2D(32, (3, 3), input_shape=input_shape))
  71. model.add(Activation('relu'))
  72. model.add(MaxPooling2D(pool_size=(2, 2)))
  73.  
  74. model.add(Conv2D(32, (3, 3)))
  75. model.add(Activation('relu'))
  76. model.add(MaxPooling2D(pool_size=(2, 2)))
  77.  
  78. model.add(Conv2D(64, (3, 3)))
  79. model.add(Activation('relu'))
  80. model.add(MaxPooling2D(pool_size=(2, 2)))
  81.  
  82.  
  83.  
  84. model.add(Flatten())
  85.  
  86. # Additional Dense layer
  87. #model.add(Dense(1024))
  88. #model.add(Activation('relu'))
  89.  
  90. #model.add(Dense(64))
  91. model.add(Dense(512))
  92. #model.add(Dense(1024))
  93. #model.add(Dense(2048))
  94. model.add(Activation('relu'))
  95. model.add(Dropout(0.5))
  96. model.add(Dense(25))
  97. model.add(Activation('softmax'))
  98. return model
  99.  
  100. def VGG_16(input_shape):
  101. model = Sequential()
  102. model.add(ZeroPadding2D((1,1),input_shape=(input_shape)))
  103. model.add(Conv2D(64, 3, 3, activation='relu'))
  104. model.add(ZeroPadding2D((1,1)))
  105. model.add(Conv2D(64, 3, 3, activation='relu'))
  106. model.add(MaxPooling2D((2,2), strides=(2,2)))
  107.  
  108. model.add(ZeroPadding2D((1,1)))
  109. model.add(Conv2D(128, 3, 3, activation='relu'))
  110. model.add(ZeroPadding2D((1,1)))
  111. model.add(Conv2D(128, 3, 3, activation='relu'))
  112. model.add(MaxPooling2D((2,2), strides=(2,2)))
  113.  
  114. model.add(ZeroPadding2D((1,1)))
  115. model.add(Conv2D(256, 3, 3, activation='relu'))
  116. model.add(ZeroPadding2D((1,1)))
  117. model.add(Conv2D(256, 3, 3, activation='relu'))
  118. model.add(ZeroPadding2D((1,1)))
  119. model.add(Conv2D(256, 3, 3, activation='relu'))
  120. model.add(MaxPooling2D((2,2), strides=(2,2)))
  121.  
  122. model.add(ZeroPadding2D((1,1)))
  123. model.add(Conv2D(512, 3, 3, activation='relu'))
  124. model.add(ZeroPadding2D((1,1)))
  125. model.add(Conv2D(512, 3, 3, activation='relu'))
  126. model.add(ZeroPadding2D((1,1)))
  127. model.add(Conv2D(512, 3, 3, activation='relu'))
  128. model.add(MaxPooling2D((2,2), strides=(2,2)))
  129.  
  130. model.add(ZeroPadding2D((1,1)))
  131. model.add(Conv2D(512, 3, 3, activation='relu'))
  132. model.add(ZeroPadding2D((1,1)))
  133. model.add(Conv2D(512, 3, 3, activation='relu'))
  134. model.add(ZeroPadding2D((1,1)))
  135. model.add(Conv2D(512, 3, 3, activation='relu'))
  136. model.add(MaxPooling2D((2,2), strides=(2,2)))
  137.  
  138. model.add(Flatten())
  139. model.add(Dense(4096, activation='relu'))
  140. model.add(Dropout(0.5))
  141. model.add(Dense(4096, activation='relu'))
  142. model.add(Dropout(0.5))
  143. model.add(Dense(1, activation='sigmoid'))
  144.  
  145. return model
  146.  
  147. def save_model(model):
  148. # save model layout
  149. model_json = model.to_json()
  150. with open("model/model.json", "w") as json_file:
  151. json_file.write(model_json)
  152.  
  153. # save model weight
  154. model.save_weights("model/model.h5")
  155. print("Saved model to disk")
  156.  
  157. def load_model():
  158. # read model layout
  159. json_file = open("model/model.json", 'r')
  160. loaded_model_json = json_file.read()
  161. json_file.close()
  162. loaded_model = model_from_json(loaded_model_json)
  163. # load weghts into new model
  164. loaded_model.load_weights("model/model.h5")
  165. return loaded_model
  166.  
  167.  
  168.  
  169.  
  170. def main():
  171. AdialerC = "Adialer.C/"
  172. AgentFYI = "Agent.FYI/"
  173. AllapleA = "Allaple.A/"
  174. AllapleL = "Allaple.L/"
  175. AluerongenJ = "Alueron.gen!J/"
  176. AutorunK = "Autorun.K/"
  177. C2LOPgeng = "C2LOP.gen!g/"
  178. C2LOPP = "C2LOP.P/"
  179. DialplatformB = "Dialplatform.B/"
  180. DontovoA = "Dontovo.A/"
  181. Fakerean = "Fakerean/"
  182. Instantaccess = "Instantaccess/"
  183. LolydaAA1 = "Lolyda.AA1/"
  184. LolydaAA2 = "Lolyda.AA2/"
  185. LolydaAA3 = "Lolyda.AA3/"
  186. LolydaAT = "Lolyda.AT/"
  187. MalexgenJ = "Malex.gen!J/"
  188. ObfuscatorAD = "Obfuscator.AD/"
  189. Rbotgen = "Rbot!gen/"
  190. SkintrimN = "Skintrim.N/"
  191. SwizzorgenE = "Swizzor.gen!E/"
  192. SwizzorgenI = "Swizzor.gen!I/"
  193. VBAT = "VB.AT/"
  194. WintrimBX = "Wintrim.BX/"
  195. YunerA = "Yuner.A/"
  196. #malicious_path = 'malicious/'
  197. #benign_path = 'benign/'
  198.  
  199. print("Loading AdialerC samples")
  200. create_dataset(AdialerC, 110)
  201.  
  202. print("Loading Agent.FYI samples")
  203. create_dataset(AgentFYI, 104)
  204.  
  205. print("Loading AllapleA samples")
  206. create_dataset(AllapleA, 2654)
  207.  
  208. print("Loading AllapleL samples")
  209. create_dataset(AllapleL, 1432)
  210.  
  211. print("Loading AluerongenJ samples")
  212. create_dataset(AluerongenJ, 178)
  213.  
  214. print("Loading Autorun.K samples")
  215. create_dataset(AutorunK, 95)
  216.  
  217. print("Loading C2LOPgeng samples")
  218. create_dataset(C2LOPgeng, 180)
  219.  
  220. print("Loading C2LOPP samples")
  221. create_dataset(C2LOPP, 131)
  222.  
  223. print("Loading DialplatformB samples")
  224. create_dataset(DialplatformB, 159)
  225.  
  226. print("Loading DontovoA samples")
  227. create_dataset(DontovoA, 146)
  228.  
  229. print("Loading Fakerean samples")
  230. create_dataset(Fakerean, 343)
  231.  
  232. print("Loading Instantaccess samples")
  233. create_dataset(Instantaccess, 388)
  234.  
  235. print("Loading LolydaAA1 samples")
  236. create_dataset(LolydaAA1, 192)
  237.  
  238. print("Loading LolydaAA2 samples")
  239. create_dataset(LolydaAA2, 166)
  240.  
  241. print("Loading LolydaAA3 samples")
  242. create_dataset(LolydaAA3, 111)
  243.  
  244. print("Loading LolydaAT samples")
  245. create_dataset(LolydaAT, 143)
  246.  
  247. print("Loading MalexgenJ samples")
  248. create_dataset(MalexgenJ, 122)
  249.  
  250. print("Loading ObfuscatorAD samples")
  251. create_dataset(ObfuscatorAD, 128)
  252.  
  253. print("Loading Rbotgen samples")
  254. create_dataset(Rbotgen, 142)
  255.  
  256. print("Loading SkintrimN samples")
  257. create_dataset(SkintrimN, 72)
  258.  
  259. print("Loading SwizzorgenE samples")
  260. create_dataset(SwizzorgenE, 115)
  261.  
  262. print("Loading SwizzorgenI samples")
  263. create_dataset(SwizzorgenI, 119)
  264.  
  265. print("Loading VBAT samples")
  266. create_dataset(VBAT, 367)
  267.  
  268. print("Loading WintrimBX samples")
  269. create_dataset(WintrimBX, 87)
  270.  
  271. print("Loading YunerA samples")
  272. create_dataset(YunerA, 720)
  273.  
  274. if K.image_data_format() == 'channels_first':
  275. input_shape = (3, img_width, img_height)
  276. else:
  277. input_shape = (img_width, img_height, 3)
  278.  
  279. # create model
  280. model = create_model(input_shape)
  281. #model = VGG_16(input_shape)
  282. model.compile(loss='categorical_crossentropy',
  283. optimizer='adadelta',
  284. metrics=['accuracy'])
  285.  
  286. # this is the augmentation configuration we will use for training
  287. train_datagen = ImageDataGenerator(
  288. rescale=1. / 255,
  289. shear_range=0.2,
  290. zoom_range=0.2,
  291. horizontal_flip=True)
  292.  
  293. # this is the augmentation configuration we will use for testing:
  294. # only rescaling
  295. test_datagen = ImageDataGenerator(rescale=1. / 255)
  296.  
  297. train_generator = train_datagen.flow_from_directory(
  298. train_data_dir,
  299. target_size=(img_width, img_height),
  300. batch_size=batch_size,
  301. class_mode='categorical')
  302.  
  303. validation_generator = test_datagen.flow_from_directory(
  304. validation_data_dir,
  305. target_size=(img_width, img_height),
  306. batch_size=batch_size,
  307. class_mode='categorical')
  308.  
  309. tb = TensorBoard(log_dir='tensorboard/')
  310. model.fit_generator(
  311. train_generator,
  312. steps_per_epoch=nb_train_samples // batch_size,
  313. epochs=epochs,
  314. validation_data=validation_generator,
  315. validation_steps=nb_validation_samples // batch_size, callbacks = [tb])
  316.  
  317. # model.save_weights('first_try.h5')
  318. # save model
  319. save_model(model)
  320.  
  321. # load model
  322. loaded_model = load_model()
  323.  
  324. # summary of model
  325. loaded_model.summary()
  326.  
  327. #get intermediate output layer
  328. # get_3rd_layer_output = K.function([loaded_model.layers[0].input, K.learning_phase()],
  329. # [loaded_model.layers[2].output])
  330. # layer_output = get_3rd_layer_output([train_generator, 0])[0]
  331. # print(layer_output.shape)
  332.  
  333. if __name__ == "__main__":
  334. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement