Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(RODBC)
- library(data.table)
- virt155 <- odbcConnect("virt155", uid = "AETP\\popova.a", pwd = "WiW7RWfbQW")
- virt153 <- odbcConnect("Virt153", uid = "AETP\\popova.a", pwd = "WiW7RWfbQW")
- DBSeldonAnalitic17 <- odbcConnect("DBSeldonAnalitic17", uid = "AETP\\popova.a", pwd = "WiW7RWfbQW")
- virt94 <- odbcConnect("Virt94", uid = "AETP\\popova.a", pwd = "WiW7RWfbQW")
- virt304 <- odbcConnect("virt304", uid = "AETP\\popova.a", pwd = "WiW7RWfbQW")
- calcul.dataset <- function(flagCalculForAll = 1, ogrn = 0) {
- if(flagCalculForAll == 0) {
- sql_ogrn = paste("'",paste(ogrn, collapse = "','"),"'",sep="")
- templace.end <- paste("
- left join Egrul.dbo.RefFirmNameTable as nameT with(nolock) on ul.prs_N_P = nameT.Obj
- left join [Egrul].[dbo].[OPF] as opf on ul.NAME_OPF_ID=opf.id
- left join [RegistryDicts].[dbo].[RefOPFDICS] as opfdict on opf.opfdics=opfdict.id
- join [RegistryDicts].[dbo].[RefEgrulStatus2FirmStatusTable] as classif on classif.RefEgrulStatusID = ul.STATUS_ID
- join [RegistryDicts].[dbo].[RefFirmStatusTable] as firmStatus on firmStatus.ID = classif.RefFirmStatusID
- where nameT.Lang = 1 and ul.ogrn in (
- ", sql_ogrn, ")", sep = "")
- }
- if(flagCalculForAll == 1) {
- templace.end <- "
- left join Egrul.dbo.RefFirmNameTable as nameT with(nolock) on ul.prs_N_P = nameT.Obj
- left join [Egrul].[dbo].[OPF] as opf on ul.NAME_OPF_ID=opf.id
- left join [RegistryDicts].[dbo].[RefOPFDICS] as opfdict on opf.opfdics=opfdict.id
- join [RegistryDicts].[dbo].[RefEgrulStatus2FirmStatusTable] as classif on classif.RefEgrulStatusID = ul.STATUS_ID
- join [RegistryDicts].[dbo].[RefFirmStatusTable] as firmStatus on firmStatus.ID = classif.RefFirmStatusID
- where firmStatus.ID in (1,2,6,7,8,11) and nameT.Lang = 1 and (opfdict.NAME in (' ',' ',' ',
- ' ',' ', ' ()') or nameT.Name like ('% %') or nameT.Name like
- ('% %'))
- "
- }
- egrul_main <- "
- select
- ul.OGRN ogrn
- ,ul.INN inn
- ,ul.CAPITAL_SUMMA equity
- ,ul.start_dtreg start_reg
- ,ul.finish_dtreg finish_reg
- ,opfdict.NAME opf
- ,case when nameT.Name like ('% %') then ' '
- when (nameT.Name like ('% %') or nameT.Name like ('% %'))
- then ' '
- when (nameT.Name like ('% %') or nameT.Name like ('% %'))
- then ' '
- else '' end opf_from_name
- ,firmStatus.Name [status]
- from Egrul.dbo.UL_First as ul
- "
- data <- data.table(sqlQuery(virt155, paste(egrul_main, templace.end), as.is = c(1,2,4,5,6,7,8)))
- ##### -
- doch.start <- "
- select
- ul.ogrn ogrn
- ,COUNT(distinct linkT.id) subsid
- from Egrul.dbo.UL_First as ul
- left join Egrul.dbo.Basis_LinksTable as linkT on linkT.Donor = ul.ogrn
- "
- dochT <- data.table(sqlQuery(virt155, paste(doch.start, templace.end, " and LinkType=1 group by ul.ogrn"), as.is = 1))
- ##### -
- filial.start <- "
- select
- ul.ogrn ogrn
- ,COUNT(distinct linkT.id) branch
- from Egrul.dbo.UL_First as ul
- left join Egrul.dbo.Basis_LinksTable as linkT on linkT.Donor = ul.ogrn
- "
- filialT <- data.table(sqlQuery(virt155, paste(filial.start, templace.end, "and LinkType=10 group by ul.ogrn"), as.is = 1))
- ##### -
- agency.start <- "
- select
- ul.ogrn ogrn
- ,COUNT(distinct linkT.id) agency
- from Egrul.dbo.UL_First as ul
- left join Egrul.dbo.Basis_LinksTable as linkT on linkT.Donor = ul.ogrn
- "
- agencyT <- data.table(sqlQuery(virt155, paste(agency.start, templace.end, "and LinkType=11 group by ul.ogrn"), as.is = 1))
- ### ==
- uch_head.start <- "
- select distinct
- ul.ogrn ogrn
- ,1 founder_head
- from [Egrul].dbo.ul_first ul with(nolock)
- left join [Egrul].dbo.[UCHR_UCHRFL] uc with(nolock) on ul.idul=uc.idul
- left join [Egrul].dbo.[DOLGNFL] d with(nolock) on ul.IDUL=d.IDUL
- "
- uch_headT <- data.table(sqlQuery(virt155, paste(uch_head.start, templace.end, "and uc.FLID1=d.FLID1"), as.is = 1))
- #####
- okved.start <- "
- select
- ul.ogrn ogrn
- ,count(okvedT.KOD_OKVED) count_okved
- from [Egrul].dbo.UL_First as ul with(nolock)
- left join [Egrul].dbo.UL_OKVED_First as okvedT with(nolock) on okvedT.IDUL = ul.IDUL
- "
- okvedT <- data.table(sqlQuery(virt155, paste(okved.start, templace.end, "group by ul.ogrn"), as.is = 1))
- #####
- contractsT44 <- data.table(sqlQuery( DBSeldonAnalitic17,
- "
- select distinct
- partT.inn inn
- ,1 is_contract44
- from OOS.dbo.ContractTable as ct with(nolock)
- inner join OOS.dbo.applicationParticipantLinkTable as apPartT with(nolock) on apPartT.applicationParticipantGUIDLink=ct.GUID
- inner join OOS.dbo.ParticipantTable as PartT with(nolock) on PartT.GUID=apPartT.participantLink
- ", as.is= 1))
- contractsT223 <- data.table(sqlQuery( DBSeldonAnalitic17,
- "
- select distinct
- partT.inn inn
- ,1 is_contract223
- from [223FZOOS].[dbo].[ContractParticipant] as ct
- join [223FZOOS].dbo.participantTable as partT ON ct.ParticipantTableGUID=partT.GUID
- ", as.is= 1))
- #####
- participT44 <- data.table(sqlQuery(DBSeldonAnalitic17,
- "
- select distinct
- inn
- ,1 is_particip44
- from OOS.dbo.ParticipantTable as partT with(nolock)
- ", as.is = 1))
- participT223 <- data.table(sqlQuery(DBSeldonAnalitic17,
- "
- select distinct
- inn
- ,1 is_particip223
- from [223FZOOS].[dbo].[participantTable]
- ", as.is = 1))
- #### 2014
- balances2014 <- data.table(sqlQuery(virt155, "
- select distinct
- compT.ogrn ogrn
- ,1 is_fin_report2014
- from Balance.dbo.B2015 balT
- left join Balance.dbo.Company as compT on compT.CompanyID = balT.CompanyID
- ", as.is = 1))
- #### 2015
- balances2015 <- data.table(sqlQuery(virt155, "
- select distinct
- compT.ogrn ogrn
- ,1 is_fin_report2015
- from Balance.dbo.B2016 balT
- left join Balance.dbo.Company as compT on compT.CompanyID = balT.CompanyID
- ", as.is = 1))
- #####
- massAdress.start <- "
- select
- ul.ogrn ogrn
- ,massT.[count] mass_adress
- from Egrul.dbo.UL_First as ul
- left join Egrul.dbo.AdressTable as adressT on adressT.ID = ul.AdressID
- left join Egrul.stat.MassAdressTable as massT on massT.OSMID = adressT.OSMId
- "
- massAdressT <- data.table(sqlQuery(virt155, paste(massAdress.start, templace.end), as.is = 1))
- ### . .
- uchrRFL.sql <- "
- select
- ul.OGRN ogrn
- ,count(distinct fl.ID) rfl
- from [Egrul].dbo.UL_First as ul
- join [Egrul].dbo.[uchr_uchrfl] fl with(nolock) on ul.idul=fl.idul
- "
- uchrRFL <- data.table(sqlQuery(virt155, paste(uchrRFL.sql, templace.end, "group by ul.ogrn"), as.is = 1))
- ### .
- uchrRUL.sql <- "
- select
- ul.OGRN ogrn
- ,count(distinct rul.ID) rul
- from [Egrul].dbo.UL_First as ul
- join [Egrul].dbo.UCHR_RUL rul with(nolock) on ul.idul=rul.idul
- "
- uchrRUL <- data.table(sqlQuery(virt155, paste(uchrRUL.sql, templace.end, "group by ul.ogrn"), as.is = 1))
- ### .
- uchrIUL.sql <- "
- select
- ul.OGRN ogrn
- ,count(distinct iul.ID) iul
- from [Egrul].dbo.UL_First as ul
- join [Egrul].dbo.UCHR_IUL iul with(nolock) on ul.idul=iul.idul
- "
- uchrIUL <- data.table(sqlQuery(virt155, paste(uchrIUL.sql, templace.end, "group by ul.ogrn"), as.is = 1))
- ###
- uchrGos.sql <- "
- select
- ul.ogrn ogrn
- ,COUNT(distinct gos_uchr.id) gos_uchr
- from Egrul.dbo.UL_First as ul
- join [Egrul].dbo.UCHR_RFSUBMO gos_uchr with(nolock) on ul.idul=gos_uchr.idul
- "
- uchrGos <- data.table(sqlQuery(virt155, paste(uchrGos.sql, templace.end, "group by ul.ogrn"), as.is = 1))
- #####
- domT <- data.table(sqlQuery(virt155,"
- select distinct
- domainT.OGRN ogrn
- ,1 domain
- from [Egrul].[dbo].[SiteProcessing] as domainT
- ", as.is = 1))
- ####
- # ecpT <- data.table(unique(read.table("D:/kosolapov.s/OneDayFirm/data/ecp.csv", col.names = c("inn"), quote="\"",
- # colClasses = "character", skip = 1)))
- # ecpT$is_ecp <- 1
- ecpT <- data.table(sqlQuery(virt94, "
- select distinct
- erpt.INN inn
- ,1 is_ecp
- from crm_3_work.dbo.custom_ecpRequestPublishedTable as erpt
- where erpt.State = 2 and INN is not null and INN not in ('','0000000000')
- ", as.is = 1))
- ###
- stats_egrul_sql <- "
- select
- ul.OGRN ogrn
- ,[Balance] countBalances
- ,License license
- ,WithoutTaxRecoding
- ,sign([BadProducers]) is_in_rnp
- ,[CntTradeMarks]
- ,[guaranContrCnt]
- ,[export]
- ,[isPVI]
- ,[isREK]
- ,Import is_importer
- from Egrul.dbo.UL_First as ul
- join [RegistryDicts].[dbo].[FirmStatTable] as statT on statT.OGRN = ul.ogrn
- "
- stats_egrul <- data.table(sqlQuery(virt155, paste(stats_egrul_sql, templace.end), as.is = 1))
- ###
- main_okved.sql <- "
- SELECT
- ul.[OGRN] ogrn
- ,isnull(okved2.code,okvClassif.Okved2Code) okved2_main
- FROM [Egrul].[dbo].[UL_first] as ul
- inner join [Egrul].[dbo].UL_OKVED_First as ok1 on ul.IDUL=ok1.IDUL and ok1.MAIN = 1
- left join [RegistryDicts].[dbo].[RefOkved2] as okved2 on ok1.KOD_OKVED=okved2.code
- left join [RegistryDicts].[dbo].[OkvedOkved2] as okvClassif on okvClassif.OkvedCode = ok1.KOD_OKVED
- "
- main_okvedT <- data.table(sqlQuery(virt155, paste(main_okved.sql, templace.end), as.is = 1))
- ###
- msp.sql <- "
- SELECT
- ul.[Ogrn] ogrn
- ,[CategoryID] mspType
- from Egrul.dbo.ul_first as ul
- join [Seldon2Pro].[dbo].[Msp] as mspT on mspT.ogrn = ul.ogrn
- "
- mspT <- data.table(sqlQuery(virt155, paste(msp.sql, templace.end), as.is = 1))
- ####
- rnp_currentT <- data.table(sqlQuery(DBSeldonAnalitic17, "
- select distinct
- badT.INN inn
- ,1 is_in_rnp_current
- from [Services].dbo.FasProducerBadTable as badT
- where GETDATE() < isnull([ExcludeDate], DATEADD(year,2,ArchiveDate))
- ", as.is = 1))
- ###
- DisqualifiedPersonT <- data.table(sqlQuery(virt155, "
- select distinct
- ul.ogrn
- ,1 is_disqualifiedPerson
- from DisqualifiedPerson.dbo.RegNoteTable as rnt
- join [DisqualifiedPerson].[dbo].[PersonTable] as pt on rnt.person=pt.GUID
- join Egrul.dbo.RefFL1 as fl1 on fl1.INN = pt.inn
- join [Egrul].dbo.[DOLGNFL] d on d.FLid1 = fl1.ID
- join Egrul.dbo.UL_First as ul on ul.IDUL=d.IDUL
- where GETDATE() between rnt.startDate and rnt.endDate
- ", as.is = 1))
- #####
- certificate.start <- "
- select distinct
- ul.ogrn ogrn,
- count (re.ser_sv) [certificate]
- from [Egrul].dbo.[UL_first] ul with(nolock)
- left join [Egrul].dbo.[REGEGRUL] re with(nolock) on ul.idul=re.idul
- "
- certificateT <- data.table(sqlQuery(virt155, paste(certificate.start, templace.end, "group by ul.ogrn" ), as.is = 1))
- #####
- arbT <- data.table(sqlQuery(virt155,
- "
- select distinct
- Ogrn ogrn
- ,[Type] typeArb
- from [ARB].[dbo].[Statistics]
- ", as.is = c(1,2)))
- arbT[, responder_binary := as.numeric(typeArb == 'D')]
- arbT[, claimant_binary := as.numeric(typeArb == 'P')]
- arbT[, third_person_binary := as.numeric(typeArb %in% c('O','T'))]
- arbT[, 'typeArb':= NULL]
- ###
- balances_sql <- "
- select
- statT.OGRN ogrn
- ,statT.[Year] [year]
- ,profit profit
- ,revenue revenue
- ,net_profit net_profit
- ,isnull(ul.CAPITAL_SUMMA, 0) equity
- FROM [Balance].[dbo].[CompanyStatTable] as statT
- join Egrul.dbo.UL_First as ul on ul.ogrn = statT.OGRN
- where profit <= 20000 and isnull(ul.CAPITAL_SUMMA, 0) <= 20000 and revenue < 1 and net_profit < 1
- "
- balancesT <- data.table(sqlQuery(virt155, balances_sql, as.is = 1), key = "ogrn")
- null_balances2015 <- data.table(ogrn = balancesT$ogrn[balancesT$year == 2016], is_null_fin_report2015 = 1, key = "ogrn")
- null_balances2014 <- data.table(ogrn = balancesT$ogrn[balancesT$year == 2015], is_null_fin_report2014 = 1, key = "ogrn")
- countNullBalances <- balancesT[, .N, ogrn]
- setnames(countNullBalances, "N", "count_null_balances")
- ###
- auditT_sql <- "
- select distinct
- orgT.Ogrn ogrn
- ,1 is_audit
- from [AuditUnPlan].[dbo].[NewAudit] as auditT
- left join [AuditUnPlan].[dbo].[Organization] as orgT on orgT.OrganizationID = auditT.[CheckOrgID]
- left join [AuditUnPlan].[dbo].[Result] as resultT on resultT.[AuditGUID] = auditT.GUID
- left join [AuditUnPlan].[dbo].[Violations] as vT with(nolock) on resultT.guid=vT.ResultGUID
- where resultT.ResultID is not null and vT.ViolationsID is null
- "
- auditT <- data.table(sqlQuery(virt153, auditT_sql, as.is = 1))
- ######################################################################################################
- ###
- data[opf == ' ', opf:= ' ']
- data[opf == ' ', opf:= ' ']
- data[!opf %in% c(' ',' ',' '), opf := opf_from_name]
- data[,start_reg := as.Date(start_reg, "%Y-%m-%d")]
- data[,finish_reg := as.Date(finish_reg, "%Y-%m-%d")]
- data[is.na(finish_reg), finish_reg := Sys.Date()]
- data[, age := as.numeric(round((finish_reg - start_reg)/30))]
- #
- uchrT <- merge(uchrRFL, uchrRUL, by = "ogrn", all.x = T, all.y = T)
- uchrT <- merge(uchrT, uchrIUL, by = "ogrn", all.x = T, all.y = T)
- uchrT <- merge(uchrT, uchrGos, by = "ogrn", all.x = T, all.y = T)
- uchrT[is.na(rfl), rfl := 0]
- uchrT[is.na(rul), rul := 0]
- uchrT[is.na(iul), iul := 0]
- uchrT[is.na(gos_uchr), gos_uchr := 0]
- uchrT[, founderAll := rfl + rul + iul + gos_uchr]
- uchrT <- uchrT[,.(ogrn,founderAll)]
- #
- main_okvedT[, okved2_main := substring(okved2_main, 1, 2)]
- main_okvedT[, is_main_okved46 := as.numeric(okved2_main == 46)]
- main_okvedT <- main_okvedT[, .(is_main_okved46 = max(is_main_okved46)), ogrn]
- stats_egrul[, is_exporter := export + isPVI + isREK]
- mspT[, is_msp := sign(mspType)]
- arbT <- arbT[, lapply(.SD, max), by = "ogrn" ]
- data <- merge(data, dochT, by = "ogrn", all.x = T, all.y = F)
- data <- merge(data, filialT, by = "ogrn", all.x = T, all.y = F)
- data <- merge(data, agencyT, by = "ogrn", all.x = T, all.y = F)
- data <- merge(data, uch_headT, by = "ogrn", all.x = T, all.y = F)
- data <- merge(data, contractsT44, by = "inn", all.x = T, all.y = F)
- data <- merge(data, participT44, by = "inn", all.x = T, all.y = F)
- data <- merge(data, contractsT223, by = "inn", all.x = T, all.y = F)
- data <- merge(data, participT223, by = "inn", all.x = T, all.y = F)
- data <- merge(data, balances2015, by = "ogrn", all.x = T, all.y = F)
- data <- merge(data, balances2014, by = "ogrn", all.x = T, all.y = F)
- data <- merge(data, ecpT, by = "inn", all.x = T, all.y = F)
- data <- merge(data, massAdressT, by = "ogrn", all.x = T, all.y = F)
- data <- merge(data, uchrT, by = "ogrn", all.x = T, all.y = F)
- data <- merge(data, mspT, by = "ogrn", all.x = T, all.y = F)
- data <- merge(data, domT, by = "ogrn", all.x = T, all.y = F)
- data <- merge(data, main_okvedT, by = "ogrn", all.x = T, all.y = F)
- data <- merge(data, stats_egrul, by = "ogrn", all.x = T, all.y = F)
- data <- merge(data, okvedT, by = "ogrn", all.x = T, all.y = F)
- data <- merge(data, rnp_currentT, by = "inn", all.x = T, all.y = F)
- data <- merge(data, DisqualifiedPersonT, by = "ogrn", all.x = T, all.y = F)
- data <- merge(data, arbT, by = "ogrn", all.x = T, all.y = F)
- data <- merge(data, certificateT, by = "ogrn", all.x = T, all.y = F)
- data <- merge(data, null_balances2015, by = "ogrn", all.x = T, all.y = F)
- data <- merge(data, null_balances2014, by = "ogrn", all.x = T, all.y = F)
- data <- merge(data, countNullBalances, by = "ogrn", all.x = T, all.y = F)
- data <- merge(data, auditT, by = "ogrn", all.x = T, all.y = F)
- data[is.na(equity), equity := 0]
- data[is.na(subsid), subsid := 0]
- data[is.na(branch), branch := 0]
- data[is.na(agency), agency := 0]
- data[is.na(founder_head), founder_head := 0]
- data[is.na(is_contract44), is_contract44 := 0]
- data[is.na(is_particip44), is_particip44 := 0]
- data[is.na(is_contract223), is_contract223 := 0]
- data[is.na(is_particip223), is_particip223 := 0]
- data[is.na(is_fin_report2015), is_fin_report2015 := 0]
- data[is.na(is_fin_report2014), is_fin_report2014 := 0]
- data[is.na(is_null_fin_report2015), is_null_fin_report2015 := 0]
- data[is.na(is_null_fin_report2014), is_null_fin_report2014 := 0]
- data[is.na(count_null_balances), count_null_balances := 0]
- data[is.na(is_ecp), is_ecp := 0]
- data[is.na(mass_adress), mass_adress := 1]
- data[is.na(founderAll), founderAll := 0]
- data[is.na(is_msp), is_msp := 0]
- data[is.na(count_okved), count_okved := 0]
- data[is.na(is_in_rnp_current), is_in_rnp_current := 0]
- data[is.na(is_disqualifiedPerson), is_disqualifiedPerson := 0]
- data[is.na(domain), domain := 0]
- data[is.na(responder_binary), responder_binary := 0]
- data[is.na(claimant_binary), claimant_binary := 0]
- data[is.na(third_person_binary), third_person_binary := 0]
- data[is.na(is_main_okved46), is_main_okved46 := 0]
- data[is.na(is_audit), is_audit := 0]
- data[, is_particip := sign(is_particip44 + is_particip223)]
- data[, is_contract := sign(is_contract44 + is_contract223)]
- data[, is_particip_oos := sign(is_particip + is_contract)]
- data[,':='(age_new = 1)][age > 70 & age <= 90, age_new := 2][age > 90 & age <= 110, age_new := 3][age > 110 & age <= 140, age_new := 4][age > 140 & age <= 180, age_new := 5][age > 180, age_new := 6]
- data[, c("start_reg","finish_reg","opf_from_name","is_contract44","is_particip44","is_contract223","is_particip223","mspType","export","isPVI","isREK") := NULL]
- return(data)
- }
- MassHeadFunction <- function() {
- ###
- headTableAll <- data.table(sqlQuery(virt155, "
- select distinct
- firstname firstName
- ,middlename middleName
- ,lastname lastName
- ,fl1.INN inn_head
- ,ul.OGRN ogrn
- from Egrul.dbo.UL_First as ul
- left join [Egrul].dbo.[DOLGNFL] d with(nolock) on ul.IDUL=d.IDUL
- left join Egrul.dbo.RefFL1 as fl1 on d.FLid1 = fl1.ID
- left join Egrul.dbo.RefFIO as fio on fio.ID = fl1.FIOid
- ", as.is = c(1,2,3,4,5)))
- headTableAll[, fio := paste(firstName, middleName, lastName, sep = " ")]
- headTableAll[, region_comp := substring(ogrn, 4, 5)]
- ### . -
- headTable <- headTableAll[, .(count_comp = length(ogrn)), by = .(fio, inn_head, region_comp)]
- ### . ,
- headTable[, ':='(is_null_inn = 0)][is.na(inn_head), is_null_inn := 1]
- needTransformT <- headTable[, .(needTransform = max(is_null_inn)), fio]
- headTable <- merge(headTable, needTransformT, by = "fio")
- headTransformTable <- headTable[needTransform == 1, ]
- headNotTransformTable <- headTable[needTransform == 0, ]
- # . ,
- headTransform_NA_inn <- headTransformTable[is.na(inn_head), ]
- headTransform_notNA_inn <- headTransformTable[!is.na(inn_head), ]
- # .
- countINN_byFioRegion <- headTransform_notNA_inn[, .(countINN_byFioRegion = length(inn_head)), by = .(fio, region_comp)]
- countINN_byFioRegion <- merge(countINN_byFioRegion, headTransform_NA_inn[,.(fio, region_comp, count_comp)],
- by = c("fio", "region_comp"), all.x = T, all.y = F)
- countINN_byFioRegion[is.na(count_comp), count_comp := 0]
- countINN_byFioRegion[, rate_comp := count_comp/(countINN_byFioRegion + 2)]
- countINN_byFioRegion[region_comp == '77', rate_comp := count_comp/(countINN_byFioRegion + 5)]
- # .
- headTransform_notNA_inn <- merge(headTransform_notNA_inn, countINN_byFioRegion[, .(fio, region_comp, rate_comp)],
- by = c("fio", "region_comp"), all.x = T, all.y = F)
- headTransform_notNA_inn[, count_comp_all := round(count_comp + rate_comp)]
- # . - .
- medianCountComp_byFioRegion <- headTransform_notNA_inn[, .(medianCountComp = round(median(count_comp_all))), by = .(fio, region_comp)]
- headTransform_NA_inn <- merge(headTransform_NA_inn, medianCountComp_byFioRegion, by = c("fio", "region_comp"), all.x = T, all.y = F)
- headTransform_NA_inn[, count_comp := as.numeric(count_comp)] #
- headTransform_NA_inn[is.na(medianCountComp), medianCountComp := count_comp]
- setnames(headTransform_NA_inn, old = "medianCountComp", new = "count_comp_all")
- # . -
- setnames(headNotTransformTable, old = "count_comp", new = "count_comp_all")
- allCompanies <- rbindlist(list(headNotTransformTable[, .(fio, inn_head, region_comp, count_comp_all)]
- ,headTransform_NA_inn[, .(fio, inn_head, region_comp, count_comp_all)]
- ,headTransform_notNA_inn[, .(fio, inn_head, region_comp, count_comp_all)])
- ,use.names = T, fill = F)
- allCompanies[is.na(inn_head), inn_head := '-1']
- allCompanies <- allCompanies[, .(countComp_byFioINN = sum(count_comp_all)), by = .(fio, inn_head)]
- #
- headTableAll[is.na(inn_head), inn_head := '-1']
- headTableAll <- merge(headTableAll, allCompanies, by = c("fio", "inn_head"), all.x = T, all.y = F)
- headTableAll[is.na(countComp_byFioINN), countComp_byFioINN := 0]
- massHeadT <- headTableAll[, .(mass_head = max(countComp_byFioINN)), ogrn]
- return(massHeadT)
- }
- MassFounderFunction <- function() {
- ### ,
- ###
- headTableAll <- data.table(sqlQuery(virt155, "
- select
- firstname firstName
- ,middlename middleName
- ,lastname lastName
- ,flT.INN inn_head
- ,ul.OGRN ogrn
- from [Egrul].dbo.UL_First as ul
- left join [Egrul].dbo.uchr_uchrfl uc on ul.idul=uc.idul
- left join [Egrul].dbo.RefFL1 flT with(nolock) on flT.id=uc.FLID1
- left join Egrul.dbo.RefFIO as fio on fio.ID = flT.FIOid
- ", as.is = c(1,2,3,4,5)))
- headTableAll[, fio := paste(firstName, middleName, lastName, sep = " ")]
- headTableAll[, region_comp := substring(ogrn, 4, 5)]
- ### . -
- headTable <- headTableAll[, .(count_comp = length(ogrn)), by = .(fio, inn_head, region_comp)]
- ### . ,
- headTable[, ':='(is_null_inn = 0)][is.na(inn_head), is_null_inn := 1]
- needTransformT <- headTable[, .(needTransform = max(is_null_inn)), fio]
- headTable <- merge(headTable, needTransformT, by = "fio")
- headTransformTable <- headTable[needTransform == 1, ]
- headNotTransformTable <- headTable[needTransform == 0, ]
- # . ,
- headTransform_NA_inn <- headTransformTable[is.na(inn_head), ]
- headTransform_notNA_inn <- headTransformTable[!is.na(inn_head), ]
- # .
- countINN_byFioRegion <- headTransform_notNA_inn[, .(countINN_byFioRegion = length(inn_head)), by = .(fio, region_comp)]
- countINN_byFioRegion <- merge(countINN_byFioRegion, headTransform_NA_inn[,.(fio, region_comp, count_comp)],
- by = c("fio", "region_comp"), all.x = T, all.y = F)
- countINN_byFioRegion[is.na(count_comp), count_comp := 0]
- countINN_byFioRegion[, rate_comp := count_comp/(countINN_byFioRegion + 2)]
- countINN_byFioRegion[region_comp == '77', rate_comp := count_comp/(countINN_byFioRegion + 5)]
- # .
- headTransform_notNA_inn <- merge(headTransform_notNA_inn, countINN_byFioRegion[, .(fio, region_comp, rate_comp)],
- by = c("fio", "region_comp"), all.x = T, all.y = F)
- headTransform_notNA_inn[, count_comp_all := round(count_comp + rate_comp)]
- # . - .
- medianCountComp_byFioRegion <- headTransform_notNA_inn[, .(medianCountComp = round(median(count_comp_all))), by = .(fio, region_comp)]
- headTransform_NA_inn <- merge(headTransform_NA_inn, medianCountComp_byFioRegion, by = c("fio", "region_comp"), all.x = T, all.y = F)
- headTransform_NA_inn[, count_comp := as.numeric(count_comp)] #
- headTransform_NA_inn[is.na(medianCountComp), medianCountComp := count_comp]
- setnames(headTransform_NA_inn, old = "medianCountComp", new = "count_comp_all")
- # . -
- setnames(headNotTransformTable, old = "count_comp", new = "count_comp_all")
- allCompanies <- rbindlist(list(headNotTransformTable[, .(fio, inn_head, region_comp, count_comp_all)]
- ,headTransform_NA_inn[, .(fio, inn_head, region_comp, count_comp_all)]
- ,headTransform_notNA_inn[, .(fio, inn_head, region_comp, count_comp_all)])
- ,use.names = T, fill = F)
- allCompanies[is.na(inn_head), inn_head := '-1']
- allCompanies <- allCompanies[, .(countComp_byFioINN = sum(count_comp_all)), by = .(fio, inn_head)]
- #
- headTableAll[is.na(inn_head), inn_head := '-1']
- headTableAll <- merge(headTableAll, allCompanies, by = c("fio", "inn_head"), all.x = T, all.y = F)
- headTableAll[is.na(countComp_byFioINN), countComp_byFioINN := 0]
- massFounderT <- headTableAll[, .(mass_founder = max(countComp_byFioINN)), ogrn]
- return(massFounderT)
- }
- create_gos_organization <- function() {
- customer_sql = "
- select distinct
- ogrn ogrn
- ,1 is_customer
- FROM [RegistryDicts].[dbo].[ContractsStatistics]
- where FirmType = 1
- "
- customerT <- data.table(sqlQuery(virt155, customer_sql, as.is = 1))
- okfs_sql = "
- select distinct
- ul.ogrn ogrn
- ,okfs.okfs okfs
- from Egrul.dbo.UL_First as ul
- left join [EGRPO].[dbo].[EgrpoTable] as et on ul.ogrnID=et.ogrn
- left join [EGRPO].[dbo].[RefOkfsTable] as okfs on et.okfsID=okfs.ID
- inner join (
- SELECT ogrn, min(IDOKPO) as id
- FROM [EGRPO].[dbo].[EgrpoTable]
- where is_first=1
- and filial=0
- group by ogrn
- ) et_id on et.IDOKPO=et_id.id --- ,
- where okfs.okfs in (11,12,13,14,40,41,42,43)
- "
- okfsT <- data.table(sqlQuery(virt155, okfs_sql, as.is = 1))
- uchrGos_sql <- "
- select
- ul.ogrn ogrn
- ,COUNT(distinct gos_uchr.id) gos_uchr
- from Egrul.dbo.UL_First as ul
- join [Egrul].dbo.UCHR_RFSUBMO gos_uchr with(nolock) on ul.idul=gos_uchr.idul
- group by ul.ogrn
- "
- uchrGos <- data.table(sqlQuery(virt155, uchrGos_sql, as.is = 1))
- statusOrg_sql = "
- select
- ogrn ogrn
- ,inn inn_gos_org
- ,firmStatus.Name status_gos_org
- from Egrul.dbo.UL_First as ul
- join [RegistryDicts].[dbo].[RefEgrulStatus2FirmStatusTable] as classif on classif.RefEgrulStatusID = ul.STATUS_ID
- join [RegistryDicts].[dbo].[RefFirmStatusTable] as firmStatus on firmStatus.ID = classif.RefFirmStatusID
- where firmStatus.ID in (1,2,6,7,8,11)
- "
- statusOrg <- data.table(sqlQuery(virt155, statusOrg_sql, as.is = c(1,2,3)))
- gosOrg = merge(customerT, okfsT, by = "ogrn", all.x = T, all.y = T)
- gosOrg = merge(gosOrg, uchrGos, by = "ogrn", all.x = T, all.y = T)
- gosOrg[, isGosOrg := 1]
- gosOrg = merge(gosOrg, statusOrg, by = "ogrn", all.x = F, all.y = F)
- return(gosOrg)
- }
- calcul.index <- function(dataset_comp) {
- load("D:/kosolapov.s/OneDayFirm/models/randomForest_oneDay_old.RData")
- load("D:/kosolapov.s/OneDayFirm/models/randomForest_oneDay_new.RData")
- dataset_comp$prob_old <- predict(randomForest_oneDay_old, newdata = dataset_comp, type="response")
- dataset_comp$prob_new <- predict(randomForest_oneDay_new, newdata = dataset_comp, type="response")
- dataset_comp[, prob:= (1/3)*prob_old + (2/3)*prob_new]
- dataset_comp[, index:= round(prob*100)]
- dataset_comp[, "prob" := NULL]
- dataset_comp[, index_res := index]
- dataset_comp[ branch > 0, index_res := 80 + 2*(index_res/10)]
- dataset_comp[ agency > 0, index_res := 80 + 2*(index_res/10)]
- dataset_comp[ guaranContrCnt > 0, index_res := 80 + 2*(index_res/10)]
- dataset_comp[ (is_importer + is_exporter) > 0, index_res := 60 + 4*(index_res/10)]
- dataset_comp[ domain > 0, index_res := 40 + 6*(index_res/10)]
- dataset_comp[ CntTradeMarks > 0, index_res := 40 + 6*(index_res/10)]
- dataset_comp[ opf == ' ', index_res := 60 + 4*(index_res/10)]
- dataset_comp[ opf == ' ', index_res := 50 + 5*(index_res/10)]
- dataset_comp[ is_in_rnp_current > 0, index_res := index_res/2]
- dataset_comp[ is_in_rnp_current == 0 & is_in_rnp > 0 , index_res := index_res/1.5]
- dataset_comp[ is_disqualifiedPerson > 0 , index_res := index_res/2]
- ## .
- gosOrg <- create_gos_organization()
- dataset_comp <- merge(dataset_comp, gosOrg, by = "ogrn", all.x = T, all.y = T)
- dataset_comp[is.na(isGosOrg), isGosOrg := 0]
- dataset_comp[isGosOrg == 1, index_res := 100 ]
- dataset_comp[is.na(inn), inn := inn_gos_org]
- ##
- dataset_comp[is.na(status), status := status_gos_org]
- dataset_comp[, c("inn_gos_org", "status_gos_org") := NULL]
- dataset_comp[ status == ' ', index_res := index_res*(3/4)]
- dataset_comp[ status == ' ', index_res := index_res/2]
- ##################################################
- ### ###
- ##################################################
- dataset_comp$index_res[dataset_comp$ogrn == '1137746216728' & dataset_comp$index_res < 83] <- 83
- dataset_comp$index_res[dataset_comp$ogrn == '1082703005219' & dataset_comp$index_res < 96] <- 96
- dataset_comp$index_res[dataset_comp$ogrn == '1127847581058' & dataset_comp$index_res < 85] <- 85
- ogrn_our <- c('5117746056820','5137746238823','5137746234456','1140280000640','5137746238812','1077763929737','5137746236964','5117746061054',
- '1127746077227','1035205407181','1105260001241','1105260001175','1077799012928','1110200000481','1075262014552','1127746077216',
- '1127746077249','1167746413999', '1177746957960')
- dataset_comp[ ogrn %in% ogrn_our, index_res := 75 + 1.5*(index_res/10)]
- #################################################
- dataset_comp[, index_res := round(index_res)]
- return(dataset_comp)
- }
- create_bankruptcy <- function() {
- bankruptcy_sql <- "
- select distinct
- ogrn ogrn
- ,1 is_bankruptcy
- from [Bankruptcy].[dbo].[MessageTable]
- where ogrn is not null
- "
- bankruptcyT <- data.table(sqlQuery(virt155, bankruptcy_sql, as.is = 1))
- return(bankruptcyT)
- }
- changeBase <- function(indexT) {
- indexBaseT <- sqlQuery(virt304, "
- select
- ogrn
- ,index_reliability
- ,is_active
- from Egrul.dbo.indexReliability
- ", as.is = 1)
- indexT <- merge(indexT, indexBaseT, by = "ogrn", all = T)
- insertT <- indexT[is.na(index_reliability),]
- deleteT <- indexT[is_active == 1 & is.na(index), ]
- changeT <- indexT[(index != index_reliability) | (is_active == 0 & !is.na(index)) , ]
- if(nrow(insertT) > 0) {
- insertT[, sql_insert := paste("insert into Egrul.dbo.indexReliability values (", ogrn, ",", index, ", NULL, 1,", ogrn, ", GETDATE())", sep = "")]
- for (i in 1:nrow(insertT)) {
- sqlQuery(virt304, insertT$sql_insert[i])
- }
- }
- if(nrow(deleteT) > 0) {
- deleteT[, sql_delete := paste("update Egrul.dbo.indexReliability set changeDate = GETDATE(), is_active = 0 where ogrnID = ", ogrn, sep = " ")]
- for (i in 1:nrow(deleteT)) {
- sqlQuery(virt304, deleteT$sql_delete[i])
- }
- }
- if(nrow(changeT) > 0) {
- changeT[, sql_update := paste("update Egrul.dbo.indexReliability set changeDate = GETDATE(), is_active = 1, index_reliability =",
- index, "where ogrnID =", ogrn, sep = " ")]
- for (i in 1:nrow(changeT)) {
- sqlQuery(virt304, changeT$sql_update[i])
- }
- }
- }
- main <- function() {
- data <- calcul.dataset()
- MassHead <- MassHeadFunction()
- MassFounder <- MassFounderFunction()
- # bankruptcyT <- create_bankruptcy()
- # data <- data[!(ogrn %in% bankruptcyT$ogrn), ]
- data <- merge(data, MassHead, by.x = "ogrn", by.y = "ogrn", all.x = T, all.y = F)
- data <- merge(data, MassFounder, by.x = "ogrn", by.y = "ogrn", all.xч Б- = T, all.y = F)
- data$mass_head[is.na(data$mass_head)] <- 4.5
- data$mass_founder[is.na(data$mass_founder)] <- 0
- write.csv(data, row.names = FALSE, "probability_extra_data.csv")
- return(data)
- }
- ##############################################################################
- data <- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement