Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def cohort_sniff_fraud(city_id, ride_type, start_date, end_date):
- '''
- city_id: The ID of the city
- ride_type: The type of the driver
- start_date: The beginning date of quest e.g. 2018-02-01
- end_date: The ending date of quest e.g. 2018-02-02
- Returns the SQL query for cohort based quests as a string
- '''
- query='''
- SELECT
- RideType,
- city_id,
- driver_id, name,
- Completed, UniqueUsers,
- Delta, DurationFraud, DistanceFraud, BackToBackPickupFraud, PickupTimeFraud, samephonefraud, HighPromoRideCount,rides_given_to_other_drivers, HighUFHighPromoRideCount,
- CEIL(100*Delta/Completed) as deltascore, CEIL(100*DurationFraud/Completed) as DurationScore, CEIL(100*DistanceFraud/Completed) as DistanceScore,
- CEIL(100*BackToBackPickupFraud/Completed) as backtobackpickupScore,
- FLOOR(100*PickupTimeFraud/Completed) as pickupTimeScore,
- CEIL(100*samephonefraud/Completed) as samephonescore,
- -- FLOOR(100*HighPromoRideCount/Completed) as HighpromoScore,
- FLOOR(100*rides_given_to_other_drivers/Completed) as rideTootherDriversScore
- ,FLOOR(100*HighUFHighPromoRideCount/Completed) as HighUFHighPromoScore
- FROM
- (SELECT
- RideType,
- city_id,
- driver_id,
- name,
- number, account_number, account_type,is_suspended,
- completed_rides as Completed,
- unique_riders_per_driver as UniqueUsers,
- ride_to_customer_delta as Delta,
- duration_based_fraud_count as DurationFraud,
- distance_based_fraud_count as DistanceFraud,
- pickup_fraud_count as BackToBackPickupFraud,
- pickuptime_fraud_count as PickupTimeFraud,
- samephonefraud,
- highfarefraud,
- HighPromoRideCount
- ,HighUFHighPromoRideCount
- FROM
- (
- select driver_id, name, number, account_number, account_type, city_id, is_suspended, RideType
- from
- (SELECT id as driver_id,
- name,
- number
- from drivers
- ) a1
- join
- (select user_id as driver_id, account_number, account_type,
- city_id, is_suspended,
- driver_type as RideType
- FROM drivers_meta) a2 using(driver_id)
- ) A
- ) A
- JOIN
- (
- SELECT driver_id,
- completed_rides,
- unique_riders_per_driver,
- (completed_rides - unique_riders_per_driver) as ride_to_customer_delta,
- duration_based_fraud_count,
- distance_based_fraud_count,
- pickup_fraud_count,
- pickuptime_fraud_count,
- samephonefraud,
- highfarefraud,
- HighPromoRideCount,
- HighUFHighPromoRideCount,
- (duration_based_fraud_count/completed_rides)*100 as duration_fraud_percent,
- (distance_based_fraud_count/completed_rides)*100 as distance_fraud_percent
- FROM
- ( SELECT driver_id,
- count(STATUS) as completed_rides,
- count(DISTINCT rider_id) as unique_riders_per_driver,
- SUM(CASE WHEN (( (
- (((ACOS( (SIN(radians(estimated_pickup_latitude)) * SIN(radians(estimated_dropoff_latitude))) + (COS(radians(estimated_pickup_latitude)) * COS(radians(estimated_dropoff_latitude)) * COS(radians(estimated_dropoff_longitude)-radians(estimated_pickup_longitude))) )) * 6371))
- -
- (((ACOS( (SIN(radians(pickup_latitude)) * SIN(radians(dropoff_latitude))) + (COS(radians(pickup_latitude)) * COS(radians(dropoff_latitude)) * COS(radians(dropoff_longitude)-radians(pickup_longitude))) )) * 6371))
- )
- /
- (((ACOS( (SIN(radians(estimated_pickup_latitude)) * SIN(radians(estimated_dropoff_latitude))) + (COS(radians(estimated_pickup_latitude)) * COS(radians(estimated_dropoff_latitude)) * COS(radians(estimated_dropoff_longitude)-radians(estimated_pickup_longitude))) )) * 6371))
- ) >= 0.8)
- OR
- (
- (((ACOS( (SIN(radians(pickup_latitude)) * SIN(radians(dropoff_latitude))) + (COS(radians(pickup_latitude)) * COS(radians(dropoff_latitude)) * COS(radians(dropoff_longitude)-radians(pickup_longitude))) )) * 6371)) <= 0.6
- )
- THEN 1 ELSE 0 END) as distance_based_fraud_count,
- SUM(CASE WHEN ( (ended_at - started_at)/60 <= 6) OR (((estimated_ride_duration - (ended_at- started_at))/estimated_ride_duration) >= 0.8) THEN 1 ELSE 0 END) as duration_based_fraud_count,
- SUM(fraud_flag) as pickup_fraud_count,
- SUM(CASE WHEN time_to_pickup < 60 THEN 1 ELSE 0 END) AS pickupTIME_fraud_count,
- SUM(CASE WHEN DriverNumber = RiderNumber THEN 1 ELSE 0 END) AS samephonefraud,
- SUM(CASE WHEN undiscounted_fare > 400 THEN 1 ELSE 0 END) AS highfarefraud,
- SUM(CASE WHEN (undiscounted_fare - fare)/undiscounted_fare > .7 THEN 1 ELSE 0 END) HighPromoRideCount,
- SUM(CASE WHEN (undiscounted_fare - fare)/undiscounted_fare > .7 AND undiscounted_fare > 100 THEN 1 ELSE 0 END) HighUFHighPromoRideCount
- from (
- select id,
- created_at as DateTime, date(Created_at) as aDate,
- status, dropoff_latitude, dropoff_longitude, pickup_latitude, pickup_longitude, estimated_pickup_latitude, estimated_pickup_longitude, estimated_dropoff_latitude, estimated_dropoff_longitude, estimated_ride_duration, ended_at, started_at, DriverNumber, RiderNumber, undiscounted_fare,
- rider_id,
- fare,
- pickup_zone_id,
- dropoff_zone_id,
- time_to_pickup,
- IF(@prev_id!=a.driver_id, @rownum:=1, @rownum:=@rownum+1) AS row_number,
- IF((ACOS( (SIN(radians(pickup_latitude)) * SIN(radians(@prev_pickup_lat))) + (COS(radians(pickup_latitude)) * COS(radians(@prev_pickup_lat)) * COS(radians(@prev_pickup_lon)-radians(pickup_longitude))) )) * 6371 <= 0.5
- and (TIMESTAMPDIFF(MINUTE, @Prev_time, a.created_at))<30 and @prev_id = driver_id,1,0) AS fraud_flag,
- @prev_id as PrevDriverID,
- @prev_id:=a.driver_id AS driver_id,
- TIMESTAMPDIFF(MINUTE, @Prev_time, a.created_at) AS time_diff,
- @Prev_time as Prev_time,
- @Prev_time := a.CREATED_AT AS created_at,
- @prev_pickup_lat := pickup_latitude AS cur_lat,
- @prev_pickup_lon := pickup_longitude AS cur_lon
- from ((rides a,
- (SELECT @rownum:=0, @prev_id:=NULL, @Prev_time:=NULL, @prev_pickup_lat := NULL, @prev_pickup_lon := NULL) b)
- join
- (select id as rider_id, number as RiderNumber
- from users) bc
- using (rider_id)
- join
- (select id as driver_id, number as DriverNumber
- from drivers) cc
- using (driver_id))
- where status = 'COMPLETED'
- and date(created_at) >= '{}'
- and date(created_at) <= '{}'
- -- AND hour(created_at) >= 7
- order by driver_id,created_at
- ) cd
- GROUP BY driver_id) C
- ) B
- USING(driver_id)
- ) E
- LEFT JOIN
- (
- SELECT driver_id, count(status) as rides_given_to_other_drivers
- FROM
- (SELECT driver_id, rider_id, date(created_at) as aDate, status FROM rides WHERE status = 'COMPLETED' AND date(created_at) between '{}' and '{}'
- -- AND hour(created_at) >= 7
- ) A
- JOIN
- (
- SELECT rider_id, aDate
- FROM
- (select id as driver_id, number from drivers where number <> ' ' ) a
- join
- (select id as rider_id, number from users where type = 'user' and number <> ' ') b
- using(number)
- join
- (select driver_id, date(created_at) as aDate from rides where status = 'completed' GROUP BY driver_id, date(created_at) ) c
- using(driver_id)
- ) B
- using(rider_id,aDate)
- GROUP BY driver_id
- ) F
- USING (driver_id)
- '''.format(start_date, end_date, start_date, end_date, start_date, end_date)
- return query
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement