Untitled



###### Read Library #######

import rakutenRMS_setup             # Importing the setup file

import urllib.request
import time
import requests
import lxml
import csv
from datetime import datetime, timedelta

import time
import random

from bs4 import BeautifulSoup

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common import action_chains, keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Getting the set up values from rakutenRMS_setup.py file
location_Chrome = rakutenRMS_setup.location_Chrome


url_default = rakutenRMS_setup.url_default
rLoginID = rakutenRMS_setup.rLoginID
rLoginPassword = rakutenRMS_setup.rLoginPassword
username = rakutenRMS_setup.username
pass_word = rakutenRMS_setup.pass_word

# Device type to track
device_nav_code = rakutenRMS_setup.device_nav_code

# User action to track
users_dropdown_option = rakutenRMS_setup.users_dropdown_option

#           Output file naming rules
# dateBlock + "rakutenRMS_data.csv"                                # user IDs
# "Tracking_" + dateBlock + "rakutenRMS_data.csv"                  # tracking info
# "User + Tracking data_" + dateBlock + ".csv"                     # Final output after merging 2 CSVs

yesterday = datetime.now() - timedelta(days=1)  #Getting previous day's date for file names
dateBlock = yesterday.strftime('%Y%m%d')


# Start Session
session = requests.session()

# Login
login_info = {
    "loginID": rLoginID,
    "loginPassword": rLoginPassword,
    "user_name" : username,
    "password" : pass_word
}


def create_session():
    s = requests.Session()
    s.headers.update({
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Accept-Language": "ja,en-US;q=0.7,en;q=0.3",
        "User-Agent":
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:55.0) Gecko/20100101 Firefox/55.0"
    })
    return s


# url_login = url_default + "/user/login_exec/"  # Example
url_login = url_default
res = session.post(url_login, data=login_info)
res.raise_for_status()

headers = {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
    "Accept-Language": "ja,en-US;q=0.7,en;q=0.3",
    "Connection": "keep-alive",
    "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:55.0) Gecko/20100101 Firefox/55.0"
}

driver = webdriver.Chrome(location_Chrome)
action = action_chains.ActionChains(driver)


# クッキーを先に取得する
driver.get(url_login)

# maximize the window
driver.maximize_window()

# Step 1 of the login : Login using rms user ID and rms password
driver.find_element_by_name("login_id").send_keys(rLoginID)
time.sleep(1)

driver.find_element_by_name("passwd").send_keys(rLoginPassword)
time.sleep(1)


driver.find_element_by_xpath(".//*[contains(text(), '次へ')]").click()
time.sleep(2)

print("############################## Step 1: Successful ############################################")


# driver.save_screenshot(_dir + "/login.png")   # ログイン済みを画像で確認できます

# Step 2 of the login: Login with user ID (email) and password

driver.find_element_by_name("user_id").send_keys(username)
time.sleep(1)
driver.find_element_by_name("user_passwd").send_keys(pass_word)
time.sleep(1)


# https://stackoverflow.com/questions/27927964/selenium-element-not-visible-exception


"""
Hitting tab 5 times to reach the desired element and then hitting enter
This code block also works with range(5) and F12 key  -> Confirm why"""

for i in range(6):
    # action.send_keys(keys.Keys.F12)
    action.send_keys(keys.Keys.TAB)
    action.perform()

action.send_keys(keys.Keys.RETURN)
action.perform()


#driver.find_element_by_xpath(".//*[contains(text(), 'ログイン')]").send_keys(Keys.RETURN)
#driver.find_element_by_xpath(".//*[contains(text(), 'ログイン')]").click()

time.sleep(2)


# Click the "つぎへ" button
driver.find_element_by_xpath('//*[@class="rf-button-primary rf-block rf-medium"]').click()
time.sleep(2)


# Click the accept compliance policy button
driver.find_element_by_xpath('//*[@class="btn-reset btn-round btn-red"]').click()
time.sleep(1)


# Click on データ分析　and then on アクセス分析
driver.find_element_by_xpath('//*[@class="rms-nav-txt"]').click()
time.sleep(2)

print("Click on データ分析　successful")


# directly navigate to the page after clicking the アクセス分析
driver.get("https://mainmenu.rms.rakuten.co.jp/?left_navi=32")


print("Click on アクセス分析　successful")
time.sleep(2)


# click on the 検索キーワード　button
driver.find_element_by_id("mm_sub0303_05").click()
time.sleep(2)

print("click on the 検索キーワード　button : Successful")

# idea 1: try tab 16(?) times and hit enter OR hit enter, tab 15 times and then hit enter again
# idea 2: directly navigate to the page by using the hyperlink
#        https://rdatatool.rms.rakuten.co.jp/access/?menu=pc&evt=RT_P11_02&stat=1&owin=

driver.get("https://rdatatool.rms.rakuten.co.jp/access/?menu=pc&evt=RT_P11_02&stat=1&owin=")
print("Click on トラキング　link: Successful")


driver.find_element_by_id(device_nav_code).click()
print("Click on radio button: Successful")


# Select a value from the 対象の動作 dropdown
dropdown_target_action = driver.find_element_by_name("limit")
for option in dropdown_target_action.find_elements_by_tag_name('option'):
    if option.text == users_dropdown_option:                      # users_dropdown_option is set at the top of the page
        option.click() # select() in earlier versions of webdriver
        break
print("Selection of the value from the 対象の動作 dropdown: Successful")


# Try submit. If not successful then use click
driver.find_element_by_name("select_day").submit()
print("Click on データ表示: Successful")


csvFile = open(dateBlock + "rakutenRMS_data.csv", 'w', newline='', encoding='shift_jis')
writer = csv.writer(csvFile)

flag_continue = 1

all_users_page_index = 1
# user_infopage_index = 1
tracking_page_index = 1

user = ""
access_count = ""
user_action = ""


tracking_urls = []

# ------------------------------------------------------------------------------
#   Total number of tables in the page: 49
#   Table with the pager links: 38,41
#   Table with the user hyperlinks, access count, user action: 40
# print("no of tables in the page:", len(table))
# -------------------------------------------------------------------------------

# for t in range(len(table)):
#     print("================================== table number: " + str(t) + " ====================================")
#     print(table[t])


while flag_continue == 1:
    all_users_page_index += 1
    obj_bs = BeautifulSoup(driver.page_source, "lxml")

    try:
        table = obj_bs.findAll("table")[40]
    except Exception:
        print("\n ------- Inside the first except block -------- \n")
        flag_continue = 0
        break

    rows = table.findAll("tr")

    for row in rows:
        flag_data_user = 0
        list_user_info = row.findAll(['td'])
        csvRow = []

        if len(list_user_info) == 0:
            # confirm: put a print statement here to check if code flows here or not
            next

        # Getting hashed user IDs
        for cell_index in list_user_info:
            hashed_user_id = str(cell_index.get_text())
            if len(hashed_user_id) > 32:
                hashed_user_id = hashed_user_id[:32]
                csvRow.append(hashed_user_id)

        # Getting URLs pointing to the user's tracking details
        for cell in list_user_info:
            user_tracking_link = str(cell.find("a"))
            locate_start = user_tracking_link.find('a href="')
            locate_end = user_tracking_link.find('" target="')

            # if locate_start == -1 or locate_end == -1:
            #
            #     print("User link not found")
            #
            # else:
            #     text_trim = user_tracking_link[locate_start + 8: locate_end]
            #     print("text_to_add is now:", text_trim)
            #     csvRow.append(text_trim)
            #     flag_data_user = 1

            # alternate form of the above code block        # De Morgan's Law
            if locate_start != -1 and locate_end != -1:
                text_trim = user_tracking_link[locate_start + 8: locate_end]
                tracking_urls.append(text_trim)
                # print("text_to_add is now:", text_trim)

                flag_data_user = 1
            else:
                font_tag_info = str(cell.find("font"))
                color_start = font_tag_info.find("color=")

                #TODO: Check and delete the else block
                # if color_start != -1:
                #     color_code = font_tag_info[color_start + 7:color_start + 7 + 7]
                # else:
                #     color_code = ""   # Color Info not found, set color_code to an empty string

                color_code = font_tag_info[color_start + 7:color_start + 7 + 7] if color_start != -1 else ""

                text_def = str(cell.get_text())
                text_trim = text_def.encode("shift_jis", errors="ignore").decode("shift_jis", "ignore")
                text_trim = "*" + text_trim if color_code == "#CC0000" else text_trim


            csvRow.append(text_trim)

        if flag_data_user == 1:
            writer.writerow(csvRow)

    try:
        #   code to navigate to the next page here

        # example urls
        # https://rdatatool.rms.rakuten.co.jp/access/?menu=pc&evt=RT_P11_02&page=2&owin=    # page 2
        # https://rdatatool.rms.rakuten.co.jp/access/?menu=pc&evt=RT_P11_02&page=3&owin=    # page 3
        all_users_link_to_check = "https://rdatatool.rms.rakuten.co.jp/access/?menu=" + device_nav_code + "&evt=RT_P11_02&page=" + str(all_users_page_index) + "&owin="
        driver.get(all_users_link_to_check)
        print("Now checking: " + all_users_link_to_check)
        time.sleep(random.randint(1, 3))

    except Exception:
        flag_continue = 0
        print(" \n ----- Inside the second except block -------- \n ")
        break

print("############### CSV File with user links created ############### ")
# input("############ Enter a key to scrape user tracking details ############ ")
csvFile.close()


# Open new csv file for the main tracking behaviour
csvFile_tracking = open("Tracking_" + dateBlock + "rakutenRMS_data.csv", 'w', newline='', encoding='shift_jis')
writer2 = csv.writer(csvFile_tracking)

main_window = driver.current_window_handle


for index in range(len(tracking_urls)):
    # for index in range(3):                              # Checking only the first 3 users for testing
    flag_continue_tracking = 1
    tracking_page_index += 1

    url_to_click = tracking_urls[index].replace("&amp;", "&")
    driver.get(url_to_click)
    print("[User:" + str(index + 1) + "] Now opening this url (Tracking): " + url_to_click)

    driver.switch_to.window(driver.window_handles[-1])

    obj_bs2 = BeautifulSoup(driver.page_source, "lxml")

    while flag_continue_tracking == 1:

        try:
            table = obj_bs2.findAll("table")[17]             # Looking into the 17th table for the main tracking details

        except Exception:
            print("\n\n\n Inside the first except block : Tracking\n")
            print("Table with user info not found")
            print("-------------------------------------------------")
            flag_continue_tracking = 0
            break

        outer_tr = table.findAll("tr")[0]           # The inner table is contained in the first <tr>
        inner_trs = outer_tr.findAll("tr")


        for row in inner_trs:
            flag_tracking_page = 0          # not used?
            list_user_tracking_info = row.findAll(['td'])
            csvRow2 = []

            if len(list_user_tracking_info) == 0:
                print("-----------------> I am here <---------------------")     # Confirm if code flows here
                next

            for cell_tracking_index in range(len(list_user_tracking_info)):

                text_def = str(list_user_tracking_info[cell_tracking_index].get_text())
                text_trim = text_def.encode("shift_jis", errors="ignore").decode("shift_jis", "ignore")
                csvRow2.append(text_trim)


            writer2.writerow(csvRow2)

        try:
            driver.find_element_by_id("page_next1").click()     # Clicking the "次の30件>>" link to navigate


            time.sleep(random.randint(1, 3))
            action.send_keys(keys.Keys.F5)
            # driver.implicitly_wait(15)
            # ------ Trying the selenium webdriver wait stmt: Start
            # try:
            #     element = WebDriverWait(driver, 10).until(
            #         EC.presence_of_element_located((By.ID, "page_next1"))
            #     )
            # except Exception:
            #     print("==== There was an error ====")
            #     input("Press any key to continue:")
            #     break

            # ------ Trying the selenium webdriver wait stmt: End


            # action.key_down(Keys.CONTROL).click(driver.find_element_by_id("page_next1")).key_up(Keys.CONTROL).perform()
            # driver.switch_to.window(driver.window_handles[-1])

            #tracking_page_index += 1

            #tracking_link_to_check = "https://rdatatool.rms.rakuten.co.jp/access/?menu=" + device_nav_code + "&evt=RT_P11_01&page=" + str(tracking_page_index) + "&owin="
            #driver.get(tracking_link_to_check)
            # driver.execute_script("window.open('');")
            # time.sleep(3)
            # driver.switch_to.window(driver.window_handles[-2])

            # input("Check active window:")                                       # Check the execution of this line
            # print("Now checking: " + tracking_link_to_check)
            time.sleep(random.randint(1, 3))

        except Exception:
            flag_continue_tracking = 0
            print("\n ----- Inside the second except block : Tracking -------- \n")
            time.sleep(random.randint(1, 3))
            break


print("\n\n================== All rows printed and saved to the csv ======================\n\n")

csvFile_tracking.close()
driver.close()
driver.quit()