Advertisement
Guest User

inelegant

a guest
Jul 1st, 2018
2,768
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 28.02 KB | None | 0 0
  1. """
  2.  
  3. A script to determine how many questions in a given annotation file received "Off-Topic" responses.
  4.  
  5. Things this script does:
  6. - Examines all responses in file and calculates total of responses that are "Off-Topic".
  7.  
  8. Output:
  9. - An excel file containing all questions that triggered Off-Topic responses
  10. - Totals: Number of questions, number of questions with on-topic answer, number of questions with off-topic answer
  11.  
  12. Params:
  13.  
  14. """
  15.  
  16. import re
  17. import os
  18. import openpyxl
  19. from openpyxl import Workbook
  20.  
  21. def assessCoverageGaps(fileName, whichPath):
  22.  
  23.     # For each annotated transcript, add all unique response ID's to dict as key, with type annotation as value
  24.     # For each link annotation file, compare responses to dict and compile totals of how many were "off topics"
  25.     # Need to create a list of all types deemed "Off-Topic"
  26.     # NEED TO CONSIDER ONLY QUESTIONS THAT HAVE BEEN LINK ANNOTATED OR RATED AS 4, CHECK LINK ANNOTATION TYPES TOO!!!! <------------
  27.  
  28.    
  29.     # Read all file names from annotation folder and store only annotated transcripts to list
  30.     annotationRE = re.compile(r'Annotated')
  31.     #fullDirectoryList = os.listdir("C:\\Users\\CENSORED\\DeskEtop\\svn_ndt\\testing\\Annotations\\type-annotations")
  32.     fullDirectoryList = os.listdir("C:\\Users\\mello\\Desktop\\testing\\Annotations\\type-annotations")
  33.     annotatedTranscriptList = []
  34.     for name in fullDirectoryList:
  35.         if annotationRE.search(name) and not name == "Annotated - Eva-Kor-Full-Transcript-Jill.xlsx":
  36.             annotatedTranscriptList.append(name)
  37.  
  38.     print(annotatedTranscriptList)
  39.  
  40.     # A Dict to hold all response IDs and their corresponding types
  41.     typeDict = {}
  42.  
  43.     # Read each file in one by one, find "ID" column, then iter through all cells in that column, adding unique IDs to Dict with type as value
  44.     for transcript in annotatedTranscriptList:
  45.         #annotatedTranscript = openpyxl.load_workbook("C:\\Users\\CENSORED\\Desktop\\svn_ndt\\testing\\Annotations\\type-annotations\\"+transcript)
  46.         annotatedTranscript = openpyxl.load_workbook("C:\\Users\\mello\\Desktop\\testing\\Annotations\\type-annotations\\"+transcript)
  47.         annotatedSheet = annotatedTranscript.active
  48.  
  49.  
  50.         # Find column letter for ID
  51.         IDRE = re.compile(r'id')
  52.         IDColumn = "ZZ"
  53.  
  54.         for col in annotatedSheet.iter_cols(min_row = 1, max_col = annotatedSheet.max_column, max_row = 1):
  55.             for cell in col:
  56.                 if cell.value:
  57.                     if IDRE.search((cell.value).lower()):
  58.                         IDColumn = cell.column
  59.  
  60.  
  61.         # Find column letter for type
  62.         typeRE = re.compile(r'type')
  63.         typeColumn = ""
  64.  
  65.         for col in annotatedSheet.iter_cols(min_row = 1, max_col = annotatedSheet.max_column, max_row = 1):
  66.             for cell in col:
  67.                 if cell.value:
  68.                     if typeRE.search((cell.value).lower()):
  69.                         typeColumn = cell.column
  70.  
  71.  
  72.         # Compile typeDict
  73.         for cellRows in annotatedSheet[IDColumn+"2":IDColumn+str(annotatedSheet.max_row)]:
  74.             for cell in cellRows:
  75.                 if cell.value:
  76.                     if cell.value not in typeDict and annotatedSheet[typeColumn+str(cell.row)].value:
  77.                         #print("yes")
  78.                         typeDict[cell.value] = annotatedSheet[typeColumn+str(cell.row)].value
  79.  
  80.  
  81.     # The file path for the annotation files
  82.     # Work Machine
  83.     """
  84.    if whichPath == "machine":
  85.        newFilePath = "C:\\Users\\CENSORED\\Desktop\\svn_ndt_all\\data-analysis\\alfred-testing-transcripts\\by-machine\\excel-version\\"
  86.    elif whichPath == "survivor":
  87.        newFilePath = "C:\\Users\\CENSORED\\Desktop\\svn_ndt_all\\data-analysis\\alfred-testing-transcripts\\by-survivor\\excel-version\\"
  88.    elif whichPath == "pinchas":
  89.        newFilePath = "C:\\Users\\CENSORED\\Desktop\\svn_ndt_all\\data-analysis\\additional-questions-for-linking"
  90.    else:
  91.        print("Invalid whichPath")
  92.        
  93.    """
  94.     # Home Machine
  95.     if whichPath == "machine":
  96.         newFilePath = "C:\\Users\\mello\\Desktop\\svn_ndt_all\\data-analysis\\alfred-testing-transcripts\\by-machine\\excel-version\\"
  97.     elif whichPath == "survivor":
  98.         newFilePath = "C:\\Users\\mello\\Desktop\\svn_ndt_all\\data-analysis\\alfred-testing-transcripts\\by-survivor\\excel-version\\"
  99.     elif whichPath == "pinchas":
  100.         newFilePath = "C:\\Users\\mello\\Desktop\\svn_ndt_all\\data-analysis\\additional-questions-for-linking"
  101.     else:
  102.         print("Invalid whichPath")
  103.    
  104.  
  105.     # Open link annotation file
  106.     thisFile = openpyxl.load_workbook(newFilePath+fileName+".xlsx")
  107.     thisSheet = thisFile.active
  108.  
  109.     # Assign column letters for each survivor
  110.     pinchasLetter = "ZZ"
  111.     samLetter = "ZZ"
  112.     fritzieLetter = "ZZ"
  113.     aaronLetter = "ZZ"
  114.     izzyLetter = "ZZ"
  115.     adinaLetter = "ZZ"
  116.     janineLetter = "ZZ"
  117.     matusLetter = "ZZ"
  118.     evaKLetter = "ZZ"
  119.     evaSLetter = "ZZ"
  120.     anitaLetter = "ZZ"
  121.     reneeLetter = "ZZ"
  122.    
  123.     for col in thisSheet.iter_cols(min_row = 1, max_col = thisSheet.max_column, max_row = 1):
  124.         for cell in col:
  125.             if cell.value:
  126.                 cellValueLow = (cell.value).lower()
  127.                 cellValueStripped = cellValueLow.strip()
  128.                 if cellValueStripped == "pinchas":
  129.                     pinchasLetter = cell.column
  130.                 elif cellValueStripped == "sam":
  131.                     samLetter = cell.column
  132.                 elif cellValueStripped == "fritzie":
  133.                     fritzieLetter = cell.column
  134.                 elif cellValueStripped == "aaron":
  135.                     aaronLetter = cell.column
  136.                 elif cellValueStripped == "izzy":
  137.                     izzyLetter = cell.column
  138.                 elif cellValueStripped == "adina":
  139.                     adinaLetter = cell.column
  140.                 elif cellValueStripped == "janine":
  141.                     janineLetter = cell.column
  142.                 elif cellValueStripped == "matus":
  143.                     matusLetter = cell.column
  144.                 elif cellValueStripped == "eva k":
  145.                     evaKLetter = cell.column
  146.                 elif cellValueStripped == "eva s":
  147.                     evaSLetter = cell.column
  148.                 elif cellValueStripped == "anita":
  149.                     anitaLetter = cell.column
  150.                 elif cellValueStripped == "renee":
  151.                     reneeLetter = cell.column
  152.  
  153.     print("Pinchas: "+pinchasLetter)
  154.  
  155.  
  156.     # Find column letter for Response ID, asr, user text, On-Topic and Off-Topic
  157.     responseIDRE = re.compile(r'response id')
  158.     responseIDColumn = "ZZ"
  159.     asrColumn = "ZZ"
  160.     asrRE = re.compile(r'asr')
  161.     userTextColumn = "ZZ"
  162.     userTextRE = re.compile(r'user')
  163.     onTopicRE = re.compile(r'on-topic')
  164.     onTopicColumn = "ZZ"
  165.     offTopicRE = re.compile(r'off-topic')
  166.     offTopicColumn = "ZZ"
  167.  
  168.     for col in thisSheet.iter_cols(min_row = 1, max_col = thisSheet.max_column, max_row = 1):
  169.         for cell in col:
  170.             if cell.value:
  171.                 if responseIDRE.search((cell.value).lower()):
  172.                     responseIDColumn = cell.column
  173.                 elif asrRE.search((cell.value).lower()):
  174.                     asrColumn = cell.column
  175.                 elif userTextRE.search((cell.value).lower()):
  176.                     userTextColumn = cell.column
  177.                 elif onTopicRE.search((cell.value).lower()):
  178.                     onTopicColumn = cell.column
  179.                 elif offTopicRE.search((cell.value).lower()):
  180.                     offTopicColumn = cell.column
  181.  
  182.     print("ResponseID: "+responseIDColumn)
  183.     print("ASR: "+asrColumn)
  184.     print("UserText: "+userTextColumn)
  185.     print("On-Topic: "+onTopicColumn)
  186.     print("Off-Topic: "+offTopicColumn)
  187.    
  188.  
  189.  
  190.     # List of all "off-topic" types
  191.     offTopicTypes = ["cant_answer", "cant_answer_reason", "cant_answer_time", "cant_answer_war", "change_topic", "direct_to_resource", "dont_understand", "other_experience",
  192.                      "rephrase", "say_again", "segue_to_related_topic", "seque_to_specific_story", "segue_to_story", "subject_idea", "subject_yes_no", "topic_intro",
  193.                      "topic_request_for_story"]
  194.  
  195.  
  196.     # List of "won't answer" types - ANALYZE SEPERATELY FROM OFF-TOPICS!!!
  197.     wontAnswerTypes = ["wont_answer", "wont_answer_inappropriate", "wont_answer_now", "wont_answer_too_personal"]
  198.  
  199.  
  200.     # Calculate data totals and copy over questions, responses and types for off-topic responses
  201.     totalQuestions = 0
  202.     offTopicQuestions = 0
  203.     onTopicQuestions = 0
  204.     unrated = 0
  205.     wontAnswerQuestions = 0
  206.     IDNotInDict = 0
  207.  
  208.     gapCoverage = Workbook()
  209.     gapCoverageSheet = gapCoverage.active
  210.     gapCoverageSheet["A1"] = "Question"
  211.     gapCoverageSheet["B1"] = "Response"
  212.     gapCoverageSheet["C1"] = "Type"
  213.     gapCoverageSheet["D1"] = "Where?"
  214.     gapCoverageSheet["E1"] = "Total Questions"
  215.     gapCoverageSheet["F1"] = "Off Topic"
  216.     gapCoverageSheet["G1"] = "On Topic"
  217.     gapCoverageSheet["H1"] = "ID Changed"
  218.     gapCoverageSheet["I1"] = "Unrated"
  219.     gapCoverageSheet["J1"] = "Old ID"
  220.     gapCoverageSheet["K1"] = "Invalid Rating"
  221.     gapCoverageSheet["L1"] = "Location"
  222.     gapCoverageCounter = 2
  223.     oldIDCounter = 2
  224.     invalidCounter = 2
  225.  
  226.     # A dict with survivor number as key and survivor letter as value
  227.     letterDict = {}
  228.     letterDict["0"] = pinchasLetter
  229.     letterDict["2"] = anitaLetter
  230.     letterDict["3"] = samLetter
  231.     letterDict["4"] = fritzieLetter
  232.     letterDict["5"] = aaronLetter
  233.     letterDict["6"] = reneeLetter
  234.     letterDict["7"] = evaSLetter
  235.     letterDict["8"] = adinaLetter
  236.     letterDict["9"] = janineLetter
  237.     letterDict["10"] = izzyLetter
  238.     letterDict["11"] = matusLetter
  239.     letterDict["12"] = evaKLetter
  240.  
  241.     # an RE to check for annotation NO_GOOD_RESPONSE
  242.     noRE = re.compile(r'no')
  243.  
  244.     # 1) First check if the question/response pair was rated as a 4 or 3, if it was a 4, only check type of ID in response ID column [DONE]
  245.     # if it was a 3, check if there are alternatives given, if there are check the type of the alternatives, if not, check type of ID in response ID column [DONE]
  246.     # if it was a 1 or 2 check if there are alternatives given, if there are check the type of the alternatives [DONE]
  247.     # also check for "NO_GOOD_RESPONSE" annotations, in these cases, count these as "offTopicQuestions" <--
  248.     # also check for ratings of 5 and 6!!! [DONE]
  249.     # add column to output sheet to indicate if off topic response is from annotation column or response ID column [DONE]
  250.     for cellRows in thisSheet[asrColumn+"2":asrColumn+str(thisSheet.max_row)]:
  251.         for cell in cellRows:
  252.             if cell.value and thisSheet[responseIDColumn+str(cell.row)].value:
  253.                 totalQuestions += 1
  254.                 # store response ID and all associated annotations in variables
  255.                 thisResponseID = thisSheet[responseIDColumn+str(cell.row)].value
  256.                 thisOnTopic = thisSheet[onTopicColumn+str(cell.row)].value
  257.                 thisOffTopic = thisSheet[offTopicColumn+str(cell.row)].value
  258.  
  259.                
  260.                 # If the response is rated as 4
  261.                 if thisOnTopic == 4:
  262.                     # If response ID is in the type Dict
  263.                     if thisResponseID in typeDict:
  264.                         # If this response is off topic, increment counter and copy data to gap coverage sheet
  265.                         if typeDict[thisResponseID] in offTopicTypes:
  266.                             offTopicQuestions += 1
  267.                             # If User Text column has transcription, use this as question, else use value in asr column
  268.                             if thisSheet[userTextColumn+str(cell.row)].value and not (thisSheet[userTextColumn+str(cell.row)].value).lower() == "check audio":
  269.                                 gapCoverageSheet["A"+str(gapCoverageCounter)] = thisSheet[userTextColumn+str(cell.row)].value
  270.                             else:
  271.                                 gapCoverageSheet["A"+str(gapCoverageCounter)] = cell.value
  272.                             gapCoverageSheet["B"+str(gapCoverageCounter)] = thisResponseID
  273.                             gapCoverageSheet["C"+str(gapCoverageCounter)] = typeDict[thisResponseID]
  274.                             gapCoverageSheet["D"+str(gapCoverageCounter)] = "response"
  275.                             gapCoverageCounter += 1
  276.                        
  277.  
  278.                         # If this response is on topic, increment counter
  279.                         else:
  280.                             onTopicQuestions += 1
  281.  
  282.                     # If response ID not in type Dict, tally number of ID's not in Dict
  283.                     else:
  284.                         IDNotInDict += 1
  285.                         gapCoverageSheet["J"+str(oldIDCounter)] = thisResponseID
  286.                         oldIDCounter += 1
  287.  
  288.                
  289.                 # If the response is rated as 3
  290.                 elif thisOnTopic == 3:
  291.                     # Extract survivor number from response ID
  292.                     pinchasRE = re.compile(r'0')
  293.                     splitResponse = thisResponseID.split("-")
  294.  
  295.                     if pinchasRE.match(splitResponse[0]):
  296.                         survivorNumber = "0"
  297.                         #print("EES")
  298.                     else:
  299.                         survivorNumber = splitResponse[0]
  300.                     # Check if alternative responses are provided in survivor column
  301.                     if thisSheet[(letterDict[survivorNumber])+str(cell.row)].value:
  302.                         # Split cell value into list of individual IDs
  303.                         cellSplit = (thisSheet[(letterDict[survivorNumber])+str(cell.row)].value).split("\n")
  304.                         isOffTopic = "false"
  305.                         noGoodResponse = "false"
  306.                         OTResponseID = "ZZ"
  307.                         #print("YES")
  308.                         # Check each ID to see if it is Off Topic, if at least 1 ID is Off Topic set isOffTopic to true
  309.                         for ID in cellSplit:
  310.                             if ID in typeDict:
  311.                                 if typeDict[ID] in offTopicTypes:
  312.                                     isOffTopic = "true"
  313.                                     OTResponseID = ID
  314.  
  315.                             elif noRE.match(ID.lower()):
  316.                                     noGoodResponse = "true"
  317.                                     noGoodResponseID = ID
  318.                             else:
  319.                                 IDNotInDict += 1
  320.                                 gapCoverageSheet["J"+str(oldIDCounter)] = thisResponseID
  321.                                 oldIDCounter += 1
  322.  
  323.  
  324.                         # If isOffTopic is true, add question to off topic count, else add to on topic count
  325.                         if isOffTopic == "true":
  326.                             offTopicQuestions += 1
  327.                             # If User Text column has transcription, use this as question, else use value in asr column
  328.                             if thisSheet[userTextColumn+str(cell.row)].value and not (thisSheet[userTextColumn+str(cell.row)].value).lower() == "check audio":
  329.                                 gapCoverageSheet["A"+str(gapCoverageCounter)] = thisSheet[userTextColumn+str(cell.row)].value
  330.                             else:
  331.                                 gapCoverageSheet["A"+str(gapCoverageCounter)] = cell.value
  332.                             gapCoverageSheet["B"+str(gapCoverageCounter)] = OTResponseID
  333.                             gapCoverageSheet["C"+str(gapCoverageCounter)] = typeDict[OTResponseID]
  334.                             gapCoverageSheet["D"+str(gapCoverageCounter)] = "annotation"
  335.                             gapCoverageCounter += 1
  336.  
  337.                         elif noGoodResponse == "true":
  338.                             offTopicQuestions += 1
  339.                             # If User Text column has transcription, use this as question, else use value in asr column
  340.                             if thisSheet[userTextColumn+str(cell.row)].value and not (thisSheet[userTextColumn+str(cell.row)].value).lower() == "check audio":
  341.                                 gapCoverageSheet["A"+str(gapCoverageCounter)] = thisSheet[userTextColumn+str(cell.row)].value
  342.                             else:
  343.                                 gapCoverageSheet["A"+str(gapCoverageCounter)] = cell.value
  344.                             gapCoverageSheet["B"+str(gapCoverageCounter)] = noGoodResponseID
  345.                             gapCoverageSheet["D"+str(gapCoverageCounter)] = "annotation"
  346.                             gapCoverageCounter += 1
  347.                            
  348.                         else:
  349.                             onTopicQuestions += 1
  350.  
  351.                            
  352.                     # If no alt responses are given for this 3, check the response ID column instead
  353.                     else:
  354.                         # If response ID is in the type Dict
  355.                         if thisResponseID in typeDict:
  356.                             # If this response is off topic, increment counter and copy data to gap coverage sheet
  357.                             if typeDict[thisResponseID] in offTopicTypes:
  358.                                 offTopicQuestions += 1
  359.                                 # If User Text column has transcription, use this as question, else use value in asr column
  360.                                 if thisSheet[userTextColumn+str(cell.row)].value and not (thisSheet[userTextColumn+str(cell.row)].value).lower() == "check audio":
  361.                                     gapCoverageSheet["A"+str(gapCoverageCounter)] = thisSheet[userTextColumn+str(cell.row)].value
  362.                                 else:
  363.                                     gapCoverageSheet["A"+str(gapCoverageCounter)] = cell.value
  364.                                 gapCoverageSheet["B"+str(gapCoverageCounter)] = thisResponseID
  365.                                 gapCoverageSheet["C"+str(gapCoverageCounter)] = typeDict[thisResponseID]
  366.                                 gapCoverageSheet["D"+str(gapCoverageCounter)] = "response"
  367.                                 gapCoverageCounter += 1
  368.                        
  369.  
  370.                             # If this response is on topic, increment counter
  371.                             else:
  372.                                 onTopicQuestions += 1
  373.  
  374.                         # If response ID not in type Dict, tally number of ID's not in Dict
  375.                         else:
  376.                             IDNotInDict += 1
  377.                             gapCoverageSheet["J"+str(oldIDCounter)] = thisResponseID
  378.                             oldIDCounter += 1
  379.  
  380.                            
  381.                 # If the response is rated as 2 or 1
  382.                 elif thisOnTopic == 2 or thisOnTopic == 1:
  383.                     # Extract survivor number from response ID
  384.                     pinchasRE = re.compile(r'0')
  385.                     splitResponse = thisResponseID.split("-")
  386.  
  387.                     if pinchasRE.match(splitResponse[0]):
  388.                         survivorNumber = "0"
  389.                         #print("EES")
  390.                     else:
  391.                         survivorNumber = splitResponse[0]
  392.                     # Check if alternative responses are provided in survivor column
  393.                     if thisSheet[(letterDict[survivorNumber])+str(cell.row)].value:
  394.                         # Split cell value into list of individual IDs
  395.                         cellSplit = (thisSheet[(letterDict[survivorNumber])+str(cell.row)].value).split("\n")
  396.                         isOffTopic = "false"
  397.                         noGoodResponse = "false"
  398.                         OTResponseID = "ZZ"
  399.                         #print("YES")
  400.                         # Check each ID to see if it is Off Topic, if at least 1 ID is Off Topic set isOffTopic to true
  401.                         for ID in cellSplit:
  402.                             #print(ID)
  403.                             if ID in typeDict:
  404.                                 if typeDict[ID] in offTopicTypes:
  405.                                     isOffTopic = "true"
  406.                                     OTResponseID = ID
  407.                            
  408.                             elif noRE.match(ID.lower()):
  409.                                     print("found one")
  410.                                     noGoodResponse = "true"
  411.                                     noGoodResponseID = ID
  412.                                    
  413.                             else:
  414.                                 IDNotInDict += 1
  415.                                 gapCoverageSheet["J"+str(oldIDCounter)] = thisResponseID
  416.                                 oldIDCounter += 1
  417.  
  418.  
  419.                         # If isOffTopic is true, add question to off topic count, else add to on topic count
  420.                         if isOffTopic == "true":
  421.                             offTopicQuestions += 1
  422.                             # If User Text column has transcription, use this as question, else use value in asr column
  423.                             if thisSheet[userTextColumn+str(cell.row)].value and not (thisSheet[userTextColumn+str(cell.row)].value).lower() == "check audio":
  424.                                 gapCoverageSheet["A"+str(gapCoverageCounter)] = thisSheet[userTextColumn+str(cell.row)].value
  425.                             else:
  426.                                 gapCoverageSheet["A"+str(gapCoverageCounter)] = cell.value
  427.                             gapCoverageSheet["B"+str(gapCoverageCounter)] = OTResponseID
  428.                             gapCoverageSheet["C"+str(gapCoverageCounter)] = typeDict[OTResponseID]
  429.                             gapCoverageSheet["D"+str(gapCoverageCounter)] = "annotation"
  430.                             gapCoverageCounter += 1
  431.  
  432.                         elif noGoodResponse == "true":
  433.                             print("yes")
  434.                             offTopicQuestions += 1
  435.                             # If User Text column has transcription, use this as question, else use value in asr column
  436.                             if thisSheet[userTextColumn+str(cell.row)].value and not (thisSheet[userTextColumn+str(cell.row)].value).lower() == "check audio":
  437.                                 gapCoverageSheet["A"+str(gapCoverageCounter)] = thisSheet[userTextColumn+str(cell.row)].value
  438.                             else:
  439.                                 gapCoverageSheet["A"+str(gapCoverageCounter)] = cell.value
  440.                             gapCoverageSheet["B"+str(gapCoverageCounter)] = noGoodResponseID
  441.                             gapCoverageSheet["D"+str(gapCoverageCounter)] = "annotation"
  442.                             gapCoverageCounter += 1
  443.                        
  444.                         else:
  445.                             onTopicQuestions += 1
  446.  
  447.                  
  448.  
  449.                 # If rating is not 1-4, add as invalid rating                  
  450.                 else:
  451.                     if not thisSheet[offTopicColumn+str(cell.row)].value:
  452.                         gapCoverageSheet["K"+str(invalidCounter)] = thisOnTopic
  453.                         gapCoverageSheet["L"+str(invalidCounter)] = onTopicColumn+str(cell.row)
  454.                         invalidCounter += 1
  455.  
  456.                
  457.                 # If off topic rating is 5, check annotations to see if OT is annotated
  458.                 if thisOffTopic == 5:
  459.                     # Extract survivor number from response ID
  460.                     pinchasRE = re.compile(r'0')
  461.                     splitResponse = thisResponseID.split("-")
  462.  
  463.                     if pinchasRE.match(splitResponse[0]):
  464.                         survivorNumber = "0"
  465.                         #print("EES")
  466.                     else:
  467.                         survivorNumber = splitResponse[0]
  468.                     # Check if alternative responses are provided in survivor column
  469.                     if thisSheet[(letterDict[survivorNumber])+str(cell.row)].value:
  470.                         # Split cell value into list of individual IDs
  471.                         cellSplit = (thisSheet[(letterDict[survivorNumber])+str(cell.row)].value).split("\n")
  472.                         isOffTopic = "false"
  473.                         OTResponseID = "ZZ"
  474.                         #print("YES")
  475.                         # Check each ID to see if it is Off Topic, if at least 1 ID is Off Topic set isOffTopic to true
  476.                         for ID in cellSplit:
  477.                             if ID in typeDict:
  478.                                 if typeDict[ID] in offTopicTypes:
  479.                                     isOffTopic = "true"
  480.                                     OTResponseID = ID
  481.                             else:
  482.                                 IDNotInDict += 1
  483.                                 gapCoverageSheet["J"+str(oldIDCounter)] = thisResponseID
  484.                                 oldIDCounter += 1
  485.  
  486.  
  487.                         # If isOffTopic is true, add question to off topic count, else add to on topic count
  488.                         if isOffTopic == "true":
  489.                             offTopicQuestions += 1
  490.                             # If User Text column has transcription, use this as question, else use value in asr column
  491.                             if thisSheet[userTextColumn+str(cell.row)].value and not (thisSheet[userTextColumn+str(cell.row)].value).lower() == "check audio":
  492.                                 gapCoverageSheet["A"+str(gapCoverageCounter)] = thisSheet[userTextColumn+str(cell.row)].value
  493.                             else:
  494.                                 gapCoverageSheet["A"+str(gapCoverageCounter)] = cell.value
  495.                             gapCoverageSheet["B"+str(gapCoverageCounter)] = OTResponseID
  496.                             gapCoverageSheet["C"+str(gapCoverageCounter)] = typeDict[OTResponseID]
  497.                             gapCoverageSheet["D"+str(gapCoverageCounter)] = "annotation"
  498.                             gapCoverageCounter += 1
  499.                         else:
  500.                             onTopicQuestions += 1
  501.                            
  502.                 # If off topic rating is 6, add to off topic count and output
  503.                 elif thisOffTopic == 6:
  504.                     offTopicQuestions += 1
  505.                     # If User Text column has transcription, use this as question, else use value in asr column
  506.                     if thisSheet[userTextColumn+str(cell.row)].value and not (thisSheet[userTextColumn+str(cell.row)].value).lower() == "check audio":
  507.                         gapCoverageSheet["A"+str(gapCoverageCounter)] = thisSheet[userTextColumn+str(cell.row)].value
  508.                     else:
  509.                         gapCoverageSheet["A"+str(gapCoverageCounter)] = cell.value
  510.                     gapCoverageSheet["B"+str(gapCoverageCounter)] = thisResponseID
  511.                     gapCoverageSheet["C"+str(gapCoverageCounter)] = typeDict[thisResponseID]
  512.                     gapCoverageSheet["D"+str(gapCoverageCounter)] = "response"
  513.                     gapCoverageCounter += 1
  514.  
  515.  
  516.                 else:
  517.                     if not thisSheet[onTopicColumn+str(cell.row)].value:
  518.                         gapCoverageSheet["K"+str(invalidCounter)] = thisOnTopic
  519.                         gapCoverageSheet["L"+str(invalidCounter)] = onTopicColumn+str(cell.row)
  520.                         invalidCounter += 1
  521.                
  522.                    
  523.                 if not thisSheet[onTopicColumn+str(cell.row)].value and not thisSheet[offTopicColumn+str(cell.row)].value:
  524.                     unrated += 1
  525.                    
  526.                    
  527.  
  528.     gapCoverageSheet["E2"] = totalQuestions
  529.     gapCoverageSheet["F2"] = offTopicQuestions
  530.     gapCoverageSheet["G2"] = onTopicQuestions
  531.     gapCoverageSheet["H2"] = IDNotInDict
  532.     gapCoverageSheet["I2"] = unrated
  533.  
  534.     #gapCoverage.save("C:\\Users\\CENSORED\\Desktop\\svn_ndt_all\\data-analysis\\alfred-testing-transcripts\\by-survivor\excel-version\\test-gap-coverage.xlsx")
  535.     gapCoverage.save("C:\\Users\\mello\\Desktop\\svn_ndt_all\\data-analysis\\alfred-testing-transcripts\\by-survivor\\excel-version\\test-gap-coverage.xlsx")
  536.                    
  537.                                        
  538.                
  539.  
  540.  
  541.  
  542. #assessCoverageGaps("helena-alfred-transcripts", "machine")
  543. assessCoverageGaps("sfa-test", "survivor")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement