Advertisement
Guest User

Untitled

a guest
Jan 22nd, 2018
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 35.91 KB | None | 0 0
  1. library(RODBC)
  2. library(data.table)
  3.  
  4. virt155 <- odbcConnect("virt155", uid = "AETP\\popova.a", pwd = "WiW7RWfbQW")
  5. virt153 <- odbcConnect("Virt153", uid = "AETP\\popova.a", pwd = "WiW7RWfbQW")
  6. DBSeldonAnalitic17 <- odbcConnect("DBSeldonAnalitic17", uid = "AETP\\popova.a", pwd = "WiW7RWfbQW")
  7. virt94 <- odbcConnect("Virt94", uid = "AETP\\popova.a", pwd = "WiW7RWfbQW")
  8. virt304 <- odbcConnect("virt304", uid = "AETP\\popova.a", pwd = "WiW7RWfbQW")
  9.  
  10.  
  11.  
  12. calcul.dataset <- function(flagCalculForAll = 1, ogrn = 0) {
  13.  
  14. if(flagCalculForAll == 0) {
  15. sql_ogrn = paste("'",paste(ogrn, collapse = "','"),"'",sep="")
  16. templace.end <- paste("
  17. left join Egrul.dbo.RefFirmNameTable as nameT with(nolock) on ul.prs_N_P = nameT.Obj
  18. left join [Egrul].[dbo].[OPF] as opf on ul.NAME_OPF_ID=opf.id
  19. left join [RegistryDicts].[dbo].[RefOPFDICS] as opfdict on opf.opfdics=opfdict.id
  20. join [RegistryDicts].[dbo].[RefEgrulStatus2FirmStatusTable] as classif on classif.RefEgrulStatusID = ul.STATUS_ID
  21. join [RegistryDicts].[dbo].[RefFirmStatusTable] as firmStatus on firmStatus.ID = classif.RefFirmStatusID
  22. where nameT.Lang = 1 and ul.ogrn in (
  23. ", sql_ogrn, ")", sep = "")
  24. }
  25.  
  26. if(flagCalculForAll == 1) {
  27. templace.end <- "
  28. left join Egrul.dbo.RefFirmNameTable as nameT with(nolock) on ul.prs_N_P = nameT.Obj
  29. left join [Egrul].[dbo].[OPF] as opf on ul.NAME_OPF_ID=opf.id
  30. left join [RegistryDicts].[dbo].[RefOPFDICS] as opfdict on opf.opfdics=opfdict.id
  31. join [RegistryDicts].[dbo].[RefEgrulStatus2FirmStatusTable] as classif on classif.RefEgrulStatusID = ul.STATUS_ID
  32. join [RegistryDicts].[dbo].[RefFirmStatusTable] as firmStatus on firmStatus.ID = classif.RefFirmStatusID
  33. where firmStatus.ID in (1,2,6,7,8,11) and nameT.Lang = 1 and (opfdict.NAME in (' ',' ',' ',
  34. ' ',' ', ' ()') or nameT.Name like ('% %') or nameT.Name like
  35. ('% %'))
  36. "
  37. }
  38.  
  39. egrul_main <- "
  40. select
  41. ul.OGRN ogrn
  42. ,ul.INN inn
  43. ,ul.CAPITAL_SUMMA equity
  44. ,ul.start_dtreg start_reg
  45. ,ul.finish_dtreg finish_reg
  46. ,opfdict.NAME opf
  47. ,case when nameT.Name like ('% %') then ' '
  48. when (nameT.Name like ('% %') or nameT.Name like ('% %'))
  49. then ' '
  50. when (nameT.Name like ('% %') or nameT.Name like ('% %'))
  51. then ' '
  52. else '' end opf_from_name
  53. ,firmStatus.Name [status]
  54. from Egrul.dbo.UL_First as ul
  55. "
  56. data <- data.table(sqlQuery(virt155, paste(egrul_main, templace.end), as.is = c(1,2,4,5,6,7,8)))
  57.  
  58. ##### -
  59. doch.start <- "
  60. select
  61. ul.ogrn ogrn
  62. ,COUNT(distinct linkT.id) subsid
  63. from Egrul.dbo.UL_First as ul
  64. left join Egrul.dbo.Basis_LinksTable as linkT on linkT.Donor = ul.ogrn
  65. "
  66. dochT <- data.table(sqlQuery(virt155, paste(doch.start, templace.end, " and LinkType=1 group by ul.ogrn"), as.is = 1))
  67.  
  68. ##### -
  69. filial.start <- "
  70. select
  71. ul.ogrn ogrn
  72. ,COUNT(distinct linkT.id) branch
  73. from Egrul.dbo.UL_First as ul
  74. left join Egrul.dbo.Basis_LinksTable as linkT on linkT.Donor = ul.ogrn
  75. "
  76. filialT <- data.table(sqlQuery(virt155, paste(filial.start, templace.end, "and LinkType=10 group by ul.ogrn"), as.is = 1))
  77.  
  78. ##### -
  79. agency.start <- "
  80. select
  81. ul.ogrn ogrn
  82. ,COUNT(distinct linkT.id) agency
  83. from Egrul.dbo.UL_First as ul
  84. left join Egrul.dbo.Basis_LinksTable as linkT on linkT.Donor = ul.ogrn
  85. "
  86. agencyT <- data.table(sqlQuery(virt155, paste(agency.start, templace.end, "and LinkType=11 group by ul.ogrn"), as.is = 1))
  87.  
  88. ### ==
  89. uch_head.start <- "
  90. select distinct
  91. ul.ogrn ogrn
  92. ,1 founder_head
  93. from [Egrul].dbo.ul_first ul with(nolock)
  94. left join [Egrul].dbo.[UCHR_UCHRFL] uc with(nolock) on ul.idul=uc.idul
  95. left join [Egrul].dbo.[DOLGNFL] d with(nolock) on ul.IDUL=d.IDUL
  96. "
  97. uch_headT <- data.table(sqlQuery(virt155, paste(uch_head.start, templace.end, "and uc.FLID1=d.FLID1"), as.is = 1))
  98.  
  99. #####
  100. okved.start <- "
  101. select
  102. ul.ogrn ogrn
  103. ,count(okvedT.KOD_OKVED) count_okved
  104. from [Egrul].dbo.UL_First as ul with(nolock)
  105. left join [Egrul].dbo.UL_OKVED_First as okvedT with(nolock) on okvedT.IDUL = ul.IDUL
  106. "
  107. okvedT <- data.table(sqlQuery(virt155, paste(okved.start, templace.end, "group by ul.ogrn"), as.is = 1))
  108.  
  109. #####
  110. contractsT44 <- data.table(sqlQuery( DBSeldonAnalitic17,
  111. "
  112. select distinct
  113. partT.inn inn
  114. ,1 is_contract44
  115. from OOS.dbo.ContractTable as ct with(nolock)
  116. inner join OOS.dbo.applicationParticipantLinkTable as apPartT with(nolock) on apPartT.applicationParticipantGUIDLink=ct.GUID
  117. inner join OOS.dbo.ParticipantTable as PartT with(nolock) on PartT.GUID=apPartT.participantLink
  118. ", as.is= 1))
  119.  
  120. contractsT223 <- data.table(sqlQuery( DBSeldonAnalitic17,
  121. "
  122. select distinct
  123. partT.inn inn
  124. ,1 is_contract223
  125. from [223FZOOS].[dbo].[ContractParticipant] as ct
  126. join [223FZOOS].dbo.participantTable as partT ON ct.ParticipantTableGUID=partT.GUID
  127. ", as.is= 1))
  128.  
  129. #####
  130. participT44 <- data.table(sqlQuery(DBSeldonAnalitic17,
  131. "
  132. select distinct
  133. inn
  134. ,1 is_particip44
  135. from OOS.dbo.ParticipantTable as partT with(nolock)
  136. ", as.is = 1))
  137.  
  138. participT223 <- data.table(sqlQuery(DBSeldonAnalitic17,
  139. "
  140. select distinct
  141. inn
  142. ,1 is_particip223
  143. from [223FZOOS].[dbo].[participantTable]
  144. ", as.is = 1))
  145.  
  146. #### 2014
  147. balances2014 <- data.table(sqlQuery(virt155, "
  148. select distinct
  149. compT.ogrn ogrn
  150. ,1 is_fin_report2014
  151. from Balance.dbo.B2015 balT
  152. left join Balance.dbo.Company as compT on compT.CompanyID = balT.CompanyID
  153. ", as.is = 1))
  154.  
  155. #### 2015
  156. balances2015 <- data.table(sqlQuery(virt155, "
  157. select distinct
  158. compT.ogrn ogrn
  159. ,1 is_fin_report2015
  160. from Balance.dbo.B2016 balT
  161. left join Balance.dbo.Company as compT on compT.CompanyID = balT.CompanyID
  162. ", as.is = 1))
  163.  
  164. #####
  165. massAdress.start <- "
  166. select
  167. ul.ogrn ogrn
  168. ,massT.[count] mass_adress
  169. from Egrul.dbo.UL_First as ul
  170. left join Egrul.dbo.AdressTable as adressT on adressT.ID = ul.AdressID
  171. left join Egrul.stat.MassAdressTable as massT on massT.OSMID = adressT.OSMId
  172. "
  173. massAdressT <- data.table(sqlQuery(virt155, paste(massAdress.start, templace.end), as.is = 1))
  174.  
  175. ### . .
  176. uchrRFL.sql <- "
  177. select
  178. ul.OGRN ogrn
  179. ,count(distinct fl.ID) rfl
  180. from [Egrul].dbo.UL_First as ul
  181. join [Egrul].dbo.[uchr_uchrfl] fl with(nolock) on ul.idul=fl.idul
  182. "
  183. uchrRFL <- data.table(sqlQuery(virt155, paste(uchrRFL.sql, templace.end, "group by ul.ogrn"), as.is = 1))
  184.  
  185.  
  186. ### .
  187. uchrRUL.sql <- "
  188. select
  189. ul.OGRN ogrn
  190. ,count(distinct rul.ID) rul
  191. from [Egrul].dbo.UL_First as ul
  192. join [Egrul].dbo.UCHR_RUL rul with(nolock) on ul.idul=rul.idul
  193. "
  194. uchrRUL <- data.table(sqlQuery(virt155, paste(uchrRUL.sql, templace.end, "group by ul.ogrn"), as.is = 1))
  195.  
  196.  
  197. ### .
  198. uchrIUL.sql <- "
  199. select
  200. ul.OGRN ogrn
  201. ,count(distinct iul.ID) iul
  202. from [Egrul].dbo.UL_First as ul
  203. join [Egrul].dbo.UCHR_IUL iul with(nolock) on ul.idul=iul.idul
  204. "
  205. uchrIUL <- data.table(sqlQuery(virt155, paste(uchrIUL.sql, templace.end, "group by ul.ogrn"), as.is = 1))
  206.  
  207. ###
  208. uchrGos.sql <- "
  209. select
  210. ul.ogrn ogrn
  211. ,COUNT(distinct gos_uchr.id) gos_uchr
  212. from Egrul.dbo.UL_First as ul
  213. join [Egrul].dbo.UCHR_RFSUBMO gos_uchr with(nolock) on ul.idul=gos_uchr.idul
  214. "
  215. uchrGos <- data.table(sqlQuery(virt155, paste(uchrGos.sql, templace.end, "group by ul.ogrn"), as.is = 1))
  216.  
  217. #####
  218. domT <- data.table(sqlQuery(virt155,"
  219. select distinct
  220. domainT.OGRN ogrn
  221. ,1 domain
  222. from [Egrul].[dbo].[SiteProcessing] as domainT
  223. ", as.is = 1))
  224.  
  225. ####
  226. # ecpT <- data.table(unique(read.table("D:/kosolapov.s/OneDayFirm/data/ecp.csv", col.names = c("inn"), quote="\"",
  227. # colClasses = "character", skip = 1)))
  228. # ecpT$is_ecp <- 1
  229.  
  230. ecpT <- data.table(sqlQuery(virt94, "
  231. select distinct
  232. erpt.INN inn
  233. ,1 is_ecp
  234. from crm_3_work.dbo.custom_ecpRequestPublishedTable as erpt
  235. where erpt.State = 2 and INN is not null and INN not in ('','0000000000')
  236. ", as.is = 1))
  237.  
  238. ###
  239. stats_egrul_sql <- "
  240. select
  241. ul.OGRN ogrn
  242. ,[Balance] countBalances
  243. ,License license
  244. ,WithoutTaxRecoding
  245. ,sign([BadProducers]) is_in_rnp
  246. ,[CntTradeMarks]
  247. ,[guaranContrCnt]
  248. ,[export]
  249. ,[isPVI]
  250. ,[isREK]
  251. ,Import is_importer
  252. from Egrul.dbo.UL_First as ul
  253. join [RegistryDicts].[dbo].[FirmStatTable] as statT on statT.OGRN = ul.ogrn
  254. "
  255. stats_egrul <- data.table(sqlQuery(virt155, paste(stats_egrul_sql, templace.end), as.is = 1))
  256.  
  257. ###
  258. main_okved.sql <- "
  259. SELECT
  260. ul.[OGRN] ogrn
  261. ,isnull(okved2.code,okvClassif.Okved2Code) okved2_main
  262. FROM [Egrul].[dbo].[UL_first] as ul
  263. inner join [Egrul].[dbo].UL_OKVED_First as ok1 on ul.IDUL=ok1.IDUL and ok1.MAIN = 1
  264. left join [RegistryDicts].[dbo].[RefOkved2] as okved2 on ok1.KOD_OKVED=okved2.code
  265. left join [RegistryDicts].[dbo].[OkvedOkved2] as okvClassif on okvClassif.OkvedCode = ok1.KOD_OKVED
  266. "
  267. main_okvedT <- data.table(sqlQuery(virt155, paste(main_okved.sql, templace.end), as.is = 1))
  268.  
  269. ###
  270. msp.sql <- "
  271. SELECT
  272. ul.[Ogrn] ogrn
  273. ,[CategoryID] mspType
  274. from Egrul.dbo.ul_first as ul
  275. join [Seldon2Pro].[dbo].[Msp] as mspT on mspT.ogrn = ul.ogrn
  276. "
  277. mspT <- data.table(sqlQuery(virt155, paste(msp.sql, templace.end), as.is = 1))
  278.  
  279.  
  280. ####
  281. rnp_currentT <- data.table(sqlQuery(DBSeldonAnalitic17, "
  282. select distinct
  283. badT.INN inn
  284. ,1 is_in_rnp_current
  285. from [Services].dbo.FasProducerBadTable as badT
  286. where GETDATE() < isnull([ExcludeDate], DATEADD(year,2,ArchiveDate))
  287. ", as.is = 1))
  288.  
  289. ###
  290. DisqualifiedPersonT <- data.table(sqlQuery(virt155, "
  291. select distinct
  292. ul.ogrn
  293. ,1 is_disqualifiedPerson
  294. from DisqualifiedPerson.dbo.RegNoteTable as rnt
  295. join [DisqualifiedPerson].[dbo].[PersonTable] as pt on rnt.person=pt.GUID
  296. join Egrul.dbo.RefFL1 as fl1 on fl1.INN = pt.inn
  297. join [Egrul].dbo.[DOLGNFL] d on d.FLid1 = fl1.ID
  298. join Egrul.dbo.UL_First as ul on ul.IDUL=d.IDUL
  299. where GETDATE() between rnt.startDate and rnt.endDate
  300. ", as.is = 1))
  301.  
  302. #####
  303. certificate.start <- "
  304. select distinct
  305. ul.ogrn ogrn,
  306. count (re.ser_sv) [certificate]
  307. from [Egrul].dbo.[UL_first] ul with(nolock)
  308. left join [Egrul].dbo.[REGEGRUL] re with(nolock) on ul.idul=re.idul
  309. "
  310. certificateT <- data.table(sqlQuery(virt155, paste(certificate.start, templace.end, "group by ul.ogrn" ), as.is = 1))
  311.  
  312. #####
  313. arbT <- data.table(sqlQuery(virt155,
  314. "
  315. select distinct
  316. Ogrn ogrn
  317. ,[Type] typeArb
  318. from [ARB].[dbo].[Statistics]
  319. ", as.is = c(1,2)))
  320. arbT[, responder_binary := as.numeric(typeArb == 'D')]
  321. arbT[, claimant_binary := as.numeric(typeArb == 'P')]
  322. arbT[, third_person_binary := as.numeric(typeArb %in% c('O','T'))]
  323. arbT[, 'typeArb':= NULL]
  324.  
  325. ###
  326. balances_sql <- "
  327. select
  328. statT.OGRN ogrn
  329. ,statT.[Year] [year]
  330. ,profit profit
  331. ,revenue revenue
  332. ,net_profit net_profit
  333. ,isnull(ul.CAPITAL_SUMMA, 0) equity
  334. FROM [Balance].[dbo].[CompanyStatTable] as statT
  335. join Egrul.dbo.UL_First as ul on ul.ogrn = statT.OGRN
  336. where profit <= 20000 and isnull(ul.CAPITAL_SUMMA, 0) <= 20000 and revenue < 1 and net_profit < 1
  337. "
  338. balancesT <- data.table(sqlQuery(virt155, balances_sql, as.is = 1), key = "ogrn")
  339. null_balances2015 <- data.table(ogrn = balancesT$ogrn[balancesT$year == 2016], is_null_fin_report2015 = 1, key = "ogrn")
  340. null_balances2014 <- data.table(ogrn = balancesT$ogrn[balancesT$year == 2015], is_null_fin_report2014 = 1, key = "ogrn")
  341. countNullBalances <- balancesT[, .N, ogrn]
  342. setnames(countNullBalances, "N", "count_null_balances")
  343.  
  344. ###
  345. auditT_sql <- "
  346. select distinct
  347. orgT.Ogrn ogrn
  348. ,1 is_audit
  349. from [AuditUnPlan].[dbo].[NewAudit] as auditT
  350. left join [AuditUnPlan].[dbo].[Organization] as orgT on orgT.OrganizationID = auditT.[CheckOrgID]
  351. left join [AuditUnPlan].[dbo].[Result] as resultT on resultT.[AuditGUID] = auditT.GUID
  352. left join [AuditUnPlan].[dbo].[Violations] as vT with(nolock) on resultT.guid=vT.ResultGUID
  353. where resultT.ResultID is not null and vT.ViolationsID is null
  354. "
  355. auditT <- data.table(sqlQuery(virt153, auditT_sql, as.is = 1))
  356.  
  357. ######################################################################################################
  358. ###
  359. data[opf == ' ', opf:= ' ']
  360. data[opf == ' ', opf:= ' ']
  361. data[!opf %in% c(' ',' ',' '), opf := opf_from_name]
  362. data[,start_reg := as.Date(start_reg, "%Y-%m-%d")]
  363. data[,finish_reg := as.Date(finish_reg, "%Y-%m-%d")]
  364. data[is.na(finish_reg), finish_reg := Sys.Date()]
  365. data[, age := as.numeric(round((finish_reg - start_reg)/30))]
  366.  
  367. #
  368. uchrT <- merge(uchrRFL, uchrRUL, by = "ogrn", all.x = T, all.y = T)
  369. uchrT <- merge(uchrT, uchrIUL, by = "ogrn", all.x = T, all.y = T)
  370. uchrT <- merge(uchrT, uchrGos, by = "ogrn", all.x = T, all.y = T)
  371. uchrT[is.na(rfl), rfl := 0]
  372. uchrT[is.na(rul), rul := 0]
  373. uchrT[is.na(iul), iul := 0]
  374. uchrT[is.na(gos_uchr), gos_uchr := 0]
  375. uchrT[, founderAll := rfl + rul + iul + gos_uchr]
  376. uchrT <- uchrT[,.(ogrn,founderAll)]
  377.  
  378. #
  379. main_okvedT[, okved2_main := substring(okved2_main, 1, 2)]
  380. main_okvedT[, is_main_okved46 := as.numeric(okved2_main == 46)]
  381. main_okvedT <- main_okvedT[, .(is_main_okved46 = max(is_main_okved46)), ogrn]
  382.  
  383. stats_egrul[, is_exporter := export + isPVI + isREK]
  384. mspT[, is_msp := sign(mspType)]
  385. arbT <- arbT[, lapply(.SD, max), by = "ogrn" ]
  386.  
  387. data <- merge(data, dochT, by = "ogrn", all.x = T, all.y = F)
  388. data <- merge(data, filialT, by = "ogrn", all.x = T, all.y = F)
  389. data <- merge(data, agencyT, by = "ogrn", all.x = T, all.y = F)
  390. data <- merge(data, uch_headT, by = "ogrn", all.x = T, all.y = F)
  391. data <- merge(data, contractsT44, by = "inn", all.x = T, all.y = F)
  392. data <- merge(data, participT44, by = "inn", all.x = T, all.y = F)
  393. data <- merge(data, contractsT223, by = "inn", all.x = T, all.y = F)
  394. data <- merge(data, participT223, by = "inn", all.x = T, all.y = F)
  395. data <- merge(data, balances2015, by = "ogrn", all.x = T, all.y = F)
  396. data <- merge(data, balances2014, by = "ogrn", all.x = T, all.y = F)
  397. data <- merge(data, ecpT, by = "inn", all.x = T, all.y = F)
  398. data <- merge(data, massAdressT, by = "ogrn", all.x = T, all.y = F)
  399. data <- merge(data, uchrT, by = "ogrn", all.x = T, all.y = F)
  400. data <- merge(data, mspT, by = "ogrn", all.x = T, all.y = F)
  401. data <- merge(data, domT, by = "ogrn", all.x = T, all.y = F)
  402. data <- merge(data, main_okvedT, by = "ogrn", all.x = T, all.y = F)
  403. data <- merge(data, stats_egrul, by = "ogrn", all.x = T, all.y = F)
  404. data <- merge(data, okvedT, by = "ogrn", all.x = T, all.y = F)
  405. data <- merge(data, rnp_currentT, by = "inn", all.x = T, all.y = F)
  406. data <- merge(data, DisqualifiedPersonT, by = "ogrn", all.x = T, all.y = F)
  407. data <- merge(data, arbT, by = "ogrn", all.x = T, all.y = F)
  408. data <- merge(data, certificateT, by = "ogrn", all.x = T, all.y = F)
  409. data <- merge(data, null_balances2015, by = "ogrn", all.x = T, all.y = F)
  410. data <- merge(data, null_balances2014, by = "ogrn", all.x = T, all.y = F)
  411. data <- merge(data, countNullBalances, by = "ogrn", all.x = T, all.y = F)
  412. data <- merge(data, auditT, by = "ogrn", all.x = T, all.y = F)
  413.  
  414.  
  415. data[is.na(equity), equity := 0]
  416. data[is.na(subsid), subsid := 0]
  417. data[is.na(branch), branch := 0]
  418. data[is.na(agency), agency := 0]
  419. data[is.na(founder_head), founder_head := 0]
  420. data[is.na(is_contract44), is_contract44 := 0]
  421. data[is.na(is_particip44), is_particip44 := 0]
  422. data[is.na(is_contract223), is_contract223 := 0]
  423. data[is.na(is_particip223), is_particip223 := 0]
  424. data[is.na(is_fin_report2015), is_fin_report2015 := 0]
  425. data[is.na(is_fin_report2014), is_fin_report2014 := 0]
  426. data[is.na(is_null_fin_report2015), is_null_fin_report2015 := 0]
  427. data[is.na(is_null_fin_report2014), is_null_fin_report2014 := 0]
  428. data[is.na(count_null_balances), count_null_balances := 0]
  429. data[is.na(is_ecp), is_ecp := 0]
  430. data[is.na(mass_adress), mass_adress := 1]
  431. data[is.na(founderAll), founderAll := 0]
  432. data[is.na(is_msp), is_msp := 0]
  433. data[is.na(count_okved), count_okved := 0]
  434. data[is.na(is_in_rnp_current), is_in_rnp_current := 0]
  435. data[is.na(is_disqualifiedPerson), is_disqualifiedPerson := 0]
  436. data[is.na(domain), domain := 0]
  437. data[is.na(responder_binary), responder_binary := 0]
  438. data[is.na(claimant_binary), claimant_binary := 0]
  439. data[is.na(third_person_binary), third_person_binary := 0]
  440. data[is.na(is_main_okved46), is_main_okved46 := 0]
  441. data[is.na(is_audit), is_audit := 0]
  442.  
  443. data[, is_particip := sign(is_particip44 + is_particip223)]
  444. data[, is_contract := sign(is_contract44 + is_contract223)]
  445. data[, is_particip_oos := sign(is_particip + is_contract)]
  446. data[,':='(age_new = 1)][age > 70 & age <= 90, age_new := 2][age > 90 & age <= 110, age_new := 3][age > 110 & age <= 140, age_new := 4][age > 140 & age <= 180, age_new := 5][age > 180, age_new := 6]
  447.  
  448. data[, c("start_reg","finish_reg","opf_from_name","is_contract44","is_particip44","is_contract223","is_particip223","mspType","export","isPVI","isREK") := NULL]
  449. return(data)
  450. }
  451.  
  452. MassHeadFunction <- function() {
  453.  
  454. ###
  455. headTableAll <- data.table(sqlQuery(virt155, "
  456. select distinct
  457. firstname firstName
  458. ,middlename middleName
  459. ,lastname lastName
  460. ,fl1.INN inn_head
  461. ,ul.OGRN ogrn
  462. from Egrul.dbo.UL_First as ul
  463. left join [Egrul].dbo.[DOLGNFL] d with(nolock) on ul.IDUL=d.IDUL
  464. left join Egrul.dbo.RefFL1 as fl1 on d.FLid1 = fl1.ID
  465. left join Egrul.dbo.RefFIO as fio on fio.ID = fl1.FIOid
  466. ", as.is = c(1,2,3,4,5)))
  467. headTableAll[, fio := paste(firstName, middleName, lastName, sep = " ")]
  468. headTableAll[, region_comp := substring(ogrn, 4, 5)]
  469.  
  470. ### . -
  471. headTable <- headTableAll[, .(count_comp = length(ogrn)), by = .(fio, inn_head, region_comp)]
  472.  
  473. ### . ,
  474. headTable[, ':='(is_null_inn = 0)][is.na(inn_head), is_null_inn := 1]
  475. needTransformT <- headTable[, .(needTransform = max(is_null_inn)), fio]
  476. headTable <- merge(headTable, needTransformT, by = "fio")
  477.  
  478. headTransformTable <- headTable[needTransform == 1, ]
  479. headNotTransformTable <- headTable[needTransform == 0, ]
  480.  
  481. # . ,
  482. headTransform_NA_inn <- headTransformTable[is.na(inn_head), ]
  483. headTransform_notNA_inn <- headTransformTable[!is.na(inn_head), ]
  484.  
  485. # .
  486. countINN_byFioRegion <- headTransform_notNA_inn[, .(countINN_byFioRegion = length(inn_head)), by = .(fio, region_comp)]
  487. countINN_byFioRegion <- merge(countINN_byFioRegion, headTransform_NA_inn[,.(fio, region_comp, count_comp)],
  488. by = c("fio", "region_comp"), all.x = T, all.y = F)
  489. countINN_byFioRegion[is.na(count_comp), count_comp := 0]
  490. countINN_byFioRegion[, rate_comp := count_comp/(countINN_byFioRegion + 2)]
  491. countINN_byFioRegion[region_comp == '77', rate_comp := count_comp/(countINN_byFioRegion + 5)]
  492.  
  493. # .
  494. headTransform_notNA_inn <- merge(headTransform_notNA_inn, countINN_byFioRegion[, .(fio, region_comp, rate_comp)],
  495. by = c("fio", "region_comp"), all.x = T, all.y = F)
  496. headTransform_notNA_inn[, count_comp_all := round(count_comp + rate_comp)]
  497.  
  498. # . - .
  499. medianCountComp_byFioRegion <- headTransform_notNA_inn[, .(medianCountComp = round(median(count_comp_all))), by = .(fio, region_comp)]
  500. headTransform_NA_inn <- merge(headTransform_NA_inn, medianCountComp_byFioRegion, by = c("fio", "region_comp"), all.x = T, all.y = F)
  501. headTransform_NA_inn[, count_comp := as.numeric(count_comp)] #
  502. headTransform_NA_inn[is.na(medianCountComp), medianCountComp := count_comp]
  503. setnames(headTransform_NA_inn, old = "medianCountComp", new = "count_comp_all")
  504.  
  505. # . -
  506. setnames(headNotTransformTable, old = "count_comp", new = "count_comp_all")
  507.  
  508. allCompanies <- rbindlist(list(headNotTransformTable[, .(fio, inn_head, region_comp, count_comp_all)]
  509. ,headTransform_NA_inn[, .(fio, inn_head, region_comp, count_comp_all)]
  510. ,headTransform_notNA_inn[, .(fio, inn_head, region_comp, count_comp_all)])
  511. ,use.names = T, fill = F)
  512. allCompanies[is.na(inn_head), inn_head := '-1']
  513. allCompanies <- allCompanies[, .(countComp_byFioINN = sum(count_comp_all)), by = .(fio, inn_head)]
  514.  
  515. #
  516. headTableAll[is.na(inn_head), inn_head := '-1']
  517. headTableAll <- merge(headTableAll, allCompanies, by = c("fio", "inn_head"), all.x = T, all.y = F)
  518. headTableAll[is.na(countComp_byFioINN), countComp_byFioINN := 0]
  519. massHeadT <- headTableAll[, .(mass_head = max(countComp_byFioINN)), ogrn]
  520.  
  521. return(massHeadT)
  522. }
  523.  
  524. MassFounderFunction <- function() {
  525. ### ,
  526.  
  527. ###
  528. headTableAll <- data.table(sqlQuery(virt155, "
  529. select
  530. firstname firstName
  531. ,middlename middleName
  532. ,lastname lastName
  533. ,flT.INN inn_head
  534. ,ul.OGRN ogrn
  535. from [Egrul].dbo.UL_First as ul
  536. left join [Egrul].dbo.uchr_uchrfl uc on ul.idul=uc.idul
  537. left join [Egrul].dbo.RefFL1 flT with(nolock) on flT.id=uc.FLID1
  538. left join Egrul.dbo.RefFIO as fio on fio.ID = flT.FIOid
  539. ", as.is = c(1,2,3,4,5)))
  540. headTableAll[, fio := paste(firstName, middleName, lastName, sep = " ")]
  541. headTableAll[, region_comp := substring(ogrn, 4, 5)]
  542.  
  543. ### . -
  544. headTable <- headTableAll[, .(count_comp = length(ogrn)), by = .(fio, inn_head, region_comp)]
  545.  
  546. ### . ,
  547. headTable[, ':='(is_null_inn = 0)][is.na(inn_head), is_null_inn := 1]
  548. needTransformT <- headTable[, .(needTransform = max(is_null_inn)), fio]
  549. headTable <- merge(headTable, needTransformT, by = "fio")
  550.  
  551. headTransformTable <- headTable[needTransform == 1, ]
  552. headNotTransformTable <- headTable[needTransform == 0, ]
  553.  
  554. # . ,
  555. headTransform_NA_inn <- headTransformTable[is.na(inn_head), ]
  556. headTransform_notNA_inn <- headTransformTable[!is.na(inn_head), ]
  557.  
  558. # .
  559. countINN_byFioRegion <- headTransform_notNA_inn[, .(countINN_byFioRegion = length(inn_head)), by = .(fio, region_comp)]
  560. countINN_byFioRegion <- merge(countINN_byFioRegion, headTransform_NA_inn[,.(fio, region_comp, count_comp)],
  561. by = c("fio", "region_comp"), all.x = T, all.y = F)
  562. countINN_byFioRegion[is.na(count_comp), count_comp := 0]
  563. countINN_byFioRegion[, rate_comp := count_comp/(countINN_byFioRegion + 2)]
  564. countINN_byFioRegion[region_comp == '77', rate_comp := count_comp/(countINN_byFioRegion + 5)]
  565.  
  566. # .
  567. headTransform_notNA_inn <- merge(headTransform_notNA_inn, countINN_byFioRegion[, .(fio, region_comp, rate_comp)],
  568. by = c("fio", "region_comp"), all.x = T, all.y = F)
  569. headTransform_notNA_inn[, count_comp_all := round(count_comp + rate_comp)]
  570.  
  571. # . - .
  572. medianCountComp_byFioRegion <- headTransform_notNA_inn[, .(medianCountComp = round(median(count_comp_all))), by = .(fio, region_comp)]
  573. headTransform_NA_inn <- merge(headTransform_NA_inn, medianCountComp_byFioRegion, by = c("fio", "region_comp"), all.x = T, all.y = F)
  574. headTransform_NA_inn[, count_comp := as.numeric(count_comp)] #
  575. headTransform_NA_inn[is.na(medianCountComp), medianCountComp := count_comp]
  576. setnames(headTransform_NA_inn, old = "medianCountComp", new = "count_comp_all")
  577.  
  578. # . -
  579. setnames(headNotTransformTable, old = "count_comp", new = "count_comp_all")
  580.  
  581. allCompanies <- rbindlist(list(headNotTransformTable[, .(fio, inn_head, region_comp, count_comp_all)]
  582. ,headTransform_NA_inn[, .(fio, inn_head, region_comp, count_comp_all)]
  583. ,headTransform_notNA_inn[, .(fio, inn_head, region_comp, count_comp_all)])
  584. ,use.names = T, fill = F)
  585. allCompanies[is.na(inn_head), inn_head := '-1']
  586. allCompanies <- allCompanies[, .(countComp_byFioINN = sum(count_comp_all)), by = .(fio, inn_head)]
  587.  
  588. #
  589. headTableAll[is.na(inn_head), inn_head := '-1']
  590. headTableAll <- merge(headTableAll, allCompanies, by = c("fio", "inn_head"), all.x = T, all.y = F)
  591. headTableAll[is.na(countComp_byFioINN), countComp_byFioINN := 0]
  592. massFounderT <- headTableAll[, .(mass_founder = max(countComp_byFioINN)), ogrn]
  593.  
  594. return(massFounderT)
  595. }
  596.  
  597. create_gos_organization <- function() {
  598. customer_sql = "
  599. select distinct
  600. ogrn ogrn
  601. ,1 is_customer
  602. FROM [RegistryDicts].[dbo].[ContractsStatistics]
  603. where FirmType = 1
  604. "
  605. customerT <- data.table(sqlQuery(virt155, customer_sql, as.is = 1))
  606.  
  607. okfs_sql = "
  608. select distinct
  609. ul.ogrn ogrn
  610. ,okfs.okfs okfs
  611. from Egrul.dbo.UL_First as ul
  612. left join [EGRPO].[dbo].[EgrpoTable] as et on ul.ogrnID=et.ogrn
  613. left join [EGRPO].[dbo].[RefOkfsTable] as okfs on et.okfsID=okfs.ID
  614. inner join (
  615. SELECT ogrn, min(IDOKPO) as id
  616. FROM [EGRPO].[dbo].[EgrpoTable]
  617. where is_first=1
  618. and filial=0
  619. group by ogrn
  620. ) et_id on et.IDOKPO=et_id.id --- ,
  621. where okfs.okfs in (11,12,13,14,40,41,42,43)
  622. "
  623. okfsT <- data.table(sqlQuery(virt155, okfs_sql, as.is = 1))
  624.  
  625. uchrGos_sql <- "
  626. select
  627. ul.ogrn ogrn
  628. ,COUNT(distinct gos_uchr.id) gos_uchr
  629. from Egrul.dbo.UL_First as ul
  630. join [Egrul].dbo.UCHR_RFSUBMO gos_uchr with(nolock) on ul.idul=gos_uchr.idul
  631. group by ul.ogrn
  632. "
  633. uchrGos <- data.table(sqlQuery(virt155, uchrGos_sql, as.is = 1))
  634.  
  635. statusOrg_sql = "
  636. select
  637. ogrn ogrn
  638. ,inn inn_gos_org
  639. ,firmStatus.Name status_gos_org
  640. from Egrul.dbo.UL_First as ul
  641. join [RegistryDicts].[dbo].[RefEgrulStatus2FirmStatusTable] as classif on classif.RefEgrulStatusID = ul.STATUS_ID
  642. join [RegistryDicts].[dbo].[RefFirmStatusTable] as firmStatus on firmStatus.ID = classif.RefFirmStatusID
  643. where firmStatus.ID in (1,2,6,7,8,11)
  644. "
  645. statusOrg <- data.table(sqlQuery(virt155, statusOrg_sql, as.is = c(1,2,3)))
  646.  
  647. gosOrg = merge(customerT, okfsT, by = "ogrn", all.x = T, all.y = T)
  648. gosOrg = merge(gosOrg, uchrGos, by = "ogrn", all.x = T, all.y = T)
  649. gosOrg[, isGosOrg := 1]
  650. gosOrg = merge(gosOrg, statusOrg, by = "ogrn", all.x = F, all.y = F)
  651. return(gosOrg)
  652. }
  653.  
  654. calcul.index <- function(dataset_comp) {
  655. load("D:/kosolapov.s/OneDayFirm/models/randomForest_oneDay_old.RData")
  656. load("D:/kosolapov.s/OneDayFirm/models/randomForest_oneDay_new.RData")
  657. dataset_comp$prob_old <- predict(randomForest_oneDay_old, newdata = dataset_comp, type="response")
  658. dataset_comp$prob_new <- predict(randomForest_oneDay_new, newdata = dataset_comp, type="response")
  659. dataset_comp[, prob:= (1/3)*prob_old + (2/3)*prob_new]
  660. dataset_comp[, index:= round(prob*100)]
  661. dataset_comp[, "prob" := NULL]
  662.  
  663. dataset_comp[, index_res := index]
  664. dataset_comp[ branch > 0, index_res := 80 + 2*(index_res/10)]
  665. dataset_comp[ agency > 0, index_res := 80 + 2*(index_res/10)]
  666. dataset_comp[ guaranContrCnt > 0, index_res := 80 + 2*(index_res/10)]
  667. dataset_comp[ (is_importer + is_exporter) > 0, index_res := 60 + 4*(index_res/10)]
  668. dataset_comp[ domain > 0, index_res := 40 + 6*(index_res/10)]
  669. dataset_comp[ CntTradeMarks > 0, index_res := 40 + 6*(index_res/10)]
  670. dataset_comp[ opf == ' ', index_res := 60 + 4*(index_res/10)]
  671. dataset_comp[ opf == ' ', index_res := 50 + 5*(index_res/10)]
  672. dataset_comp[ is_in_rnp_current > 0, index_res := index_res/2]
  673. dataset_comp[ is_in_rnp_current == 0 & is_in_rnp > 0 , index_res := index_res/1.5]
  674. dataset_comp[ is_disqualifiedPerson > 0 , index_res := index_res/2]
  675.  
  676. ## .
  677. gosOrg <- create_gos_organization()
  678. dataset_comp <- merge(dataset_comp, gosOrg, by = "ogrn", all.x = T, all.y = T)
  679. dataset_comp[is.na(isGosOrg), isGosOrg := 0]
  680. dataset_comp[isGosOrg == 1, index_res := 100 ]
  681. dataset_comp[is.na(inn), inn := inn_gos_org]
  682.  
  683. ##
  684. dataset_comp[is.na(status), status := status_gos_org]
  685. dataset_comp[, c("inn_gos_org", "status_gos_org") := NULL]
  686. dataset_comp[ status == ' ', index_res := index_res*(3/4)]
  687. dataset_comp[ status == ' ', index_res := index_res/2]
  688.  
  689. ##################################################
  690. ### ###
  691. ##################################################
  692. dataset_comp$index_res[dataset_comp$ogrn == '1137746216728' & dataset_comp$index_res < 83] <- 83
  693. dataset_comp$index_res[dataset_comp$ogrn == '1082703005219' & dataset_comp$index_res < 96] <- 96
  694. dataset_comp$index_res[dataset_comp$ogrn == '1127847581058' & dataset_comp$index_res < 85] <- 85
  695.  
  696.  
  697. ogrn_our <- c('5117746056820','5137746238823','5137746234456','1140280000640','5137746238812','1077763929737','5137746236964','5117746061054',
  698. '1127746077227','1035205407181','1105260001241','1105260001175','1077799012928','1110200000481','1075262014552','1127746077216',
  699. '1127746077249','1167746413999', '1177746957960')
  700.  
  701. dataset_comp[ ogrn %in% ogrn_our, index_res := 75 + 1.5*(index_res/10)]
  702.  
  703. #################################################
  704. dataset_comp[, index_res := round(index_res)]
  705.  
  706. return(dataset_comp)
  707. }
  708.  
  709. create_bankruptcy <- function() {
  710. bankruptcy_sql <- "
  711. select distinct
  712. ogrn ogrn
  713. ,1 is_bankruptcy
  714. from [Bankruptcy].[dbo].[MessageTable]
  715. where ogrn is not null
  716. "
  717. bankruptcyT <- data.table(sqlQuery(virt155, bankruptcy_sql, as.is = 1))
  718. return(bankruptcyT)
  719. }
  720.  
  721. changeBase <- function(indexT) {
  722. indexBaseT <- sqlQuery(virt304, "
  723. select
  724. ogrn
  725. ,index_reliability
  726. ,is_active
  727. from Egrul.dbo.indexReliability
  728. ", as.is = 1)
  729.  
  730. indexT <- merge(indexT, indexBaseT, by = "ogrn", all = T)
  731.  
  732. insertT <- indexT[is.na(index_reliability),]
  733. deleteT <- indexT[is_active == 1 & is.na(index), ]
  734. changeT <- indexT[(index != index_reliability) | (is_active == 0 & !is.na(index)) , ]
  735.  
  736. if(nrow(insertT) > 0) {
  737. insertT[, sql_insert := paste("insert into Egrul.dbo.indexReliability values (", ogrn, ",", index, ", NULL, 1,", ogrn, ", GETDATE())", sep = "")]
  738. for (i in 1:nrow(insertT)) {
  739. sqlQuery(virt304, insertT$sql_insert[i])
  740. }
  741. }
  742.  
  743. if(nrow(deleteT) > 0) {
  744. deleteT[, sql_delete := paste("update Egrul.dbo.indexReliability set changeDate = GETDATE(), is_active = 0 where ogrnID = ", ogrn, sep = " ")]
  745. for (i in 1:nrow(deleteT)) {
  746. sqlQuery(virt304, deleteT$sql_delete[i])
  747. }
  748. }
  749.  
  750. if(nrow(changeT) > 0) {
  751. changeT[, sql_update := paste("update Egrul.dbo.indexReliability set changeDate = GETDATE(), is_active = 1, index_reliability =",
  752. index, "where ogrnID =", ogrn, sep = " ")]
  753. for (i in 1:nrow(changeT)) {
  754. sqlQuery(virt304, changeT$sql_update[i])
  755. }
  756. }
  757. }
  758.  
  759. main <- function() {
  760. data <- calcul.dataset()
  761. MassHead <- MassHeadFunction()
  762. MassFounder <- MassFounderFunction()
  763. # bankruptcyT <- create_bankruptcy()
  764.  
  765. # data <- data[!(ogrn %in% bankruptcyT$ogrn), ]
  766. data <- merge(data, MassHead, by.x = "ogrn", by.y = "ogrn", all.x = T, all.y = F)
  767. data <- merge(data, MassFounder, by.x = "ogrn", by.y = "ogrn", all.xч Б- = T, all.y = F)
  768. data$mass_head[is.na(data$mass_head)] <- 4.5
  769. data$mass_founder[is.na(data$mass_founder)] <- 0
  770. write.csv(data, row.names = FALSE, "probability_extra_data.csv")
  771. return(data)
  772. }
  773. ##############################################################################
  774.  
  775. data <- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement