icenine

Untitled

Oct 14th, 2013
106
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 15.25 KB | None | 0 0
  1. #!/usr/bin/perl -w
  2.  
  3. use strict;
  4. use 5.010;
  5.  
  6. # Required for parsing NRLS data
  7. use Excel::Reader::XLSX;
  8. use DateTime::Format::Excel;
  9.  
  10. # Required for inputing data into Neo4j
  11. use REST::Neo4p;
  12.  
  13. use Data::Dumper;
  14. use Getopt::Long;
  15.  
  16. use Try::Tiny;
  17.  
  18. my $input_file;
  19. my $neo_uri = 'http://larva.med.ic.ac.uk:7474';
  20.  
  21. GetOptions (
  22.     "input_file=s"  => \$input_file,
  23.     "neo_uri=s"     => \$neo_uri,);
  24.  
  25. if (!defined($input_file)) {
  26.     die "Usage: $0 --input_file <xlsx file>";
  27. }
  28.  
  29. # Define required column headers
  30. my @required_columns = (
  31.     "IN07",             # Incident description
  32.     "PD09",             # Indident degree of harm
  33.     "RP07",             # NHS Trust
  34.     "IN05_lvl1",        # Incident category
  35.     "PD05_lvl1",        # Speciality
  36.     "PD02",             # Patient sex
  37.     "IN01",             # Incident date
  38.     "IN02_A_01",        # Incident hour
  39.     "Age_at_Incident",  # Patient age
  40.     "PD11",             # Patient ethnicity
  41.     "ST01_LVL1",        # Recorder staff type
  42.     "IN03_lvl1",        # Incident location
  43.     "incidentID",       # Unique incident ID
  44. );
  45.  
  46. my $reader = Excel::Reader::XLSX->new();
  47.  
  48. my $workbook = $reader->read_file($input_file) or
  49.     die ("Could not read $input_file.");
  50.  
  51. my $worksheet = ($workbook->worksheets())[0] or
  52.     die ("Failed to assign worksheet from workbook.");
  53.  
  54. my $column_header_row = $worksheet->next_row() or
  55.     die ("Could not get column header row.");
  56.  
  57. my %header_map;
  58.  
  59. # Find column positions for required columns
  60. while (my $cell = $column_header_row->next_cell()) {
  61.     my $cell_value = $cell->value();
  62.  
  63.     if (grep /$cell_value/, @required_columns) {
  64.         $header_map{$cell_value} = $cell->col();
  65.     }
  66. }
  67.  
  68. # Create connection to Neo4j
  69. REST::Neo4p->connect($neo_uri) or
  70.     die ("Could not connect to $neo_uri.");
  71.  
  72. my (%nodes, %relationships);
  73.  
  74. my $created_nhs_trust_node;
  75.  
  76. while (my $row = $worksheet->next_row) {
  77.     my @values = $row->values();
  78.     if (@values == 0) {
  79.         warn ("Row at ", $row->row_number() , " contains no values.");
  80.     }
  81.  
  82.     @values = map {
  83.         if ($_ =~ /^ ?$/ or !defined($_)) {
  84.             $_ = "NOT_DEFINED";
  85.         } else {
  86.             $_;
  87.         }
  88.     } @values;
  89.  
  90.     my ($nhs_trust, $nhs_trust_location, $degree_of_harm,
  91.         $patient_age, $patient_sex, $patient_ethnicity,
  92.         $incident_id, $incident_date,
  93.         $incident_hour, $incident_description,
  94.         $incident_category, $incident_reporter,
  95.         $incident_speciality);
  96.  
  97.     my ($last_known_query, $neo4p_query);
  98.  
  99.     try {
  100.         $nhs_trust =            $values[$header_map{"RP07"}];
  101.         $nhs_trust_location =   $values[$header_map{"IN03_lvl1"}];
  102.         $degree_of_harm =       $values[$header_map{"PD09"}];
  103.         $patient_age =          $values[$header_map{"Age_at_Incident"}];
  104.         $patient_sex =          $values[$header_map{"PD02"}];
  105.         $patient_ethnicity =    $values[$header_map{"PD11"}];
  106.         $incident_id =          $values[$header_map{"incidentID"}];
  107.         $incident_date =        $values[$header_map{"IN01"}];
  108.         $incident_hour =        $values[$header_map{"IN02_A_01"}];
  109.         $incident_description = $values[$header_map{"IN07"}];
  110.         $incident_category =    $values[$header_map{"IN05_lvl1"}];
  111.         $incident_reporter =    $values[$header_map{"ST01_LVL1"}];
  112.         $incident_speciality =  $values[$header_map{"PD05_lvl1"}];
  113.  
  114.         # Normalize double quotes in incident description
  115.         $incident_description =~ s/\\//g;
  116.         $incident_description =~ s/"/\\"/g;
  117.  
  118.         # Assign unique trust id just to ensure uniqueness on set
  119.         my $trust_incident_id = $incident_id . "_" . $nhs_trust;
  120.  
  121.         # Normalize incident hour
  122.         if ($incident_hour eq 'NOT_DEFINED') {
  123.             $incident_hour = '00';
  124.         } else {
  125.             $incident_hour = substr($incident_hour, 0, 2);
  126.         }
  127.  
  128.         # Normalize date
  129.         my $incident_timestamp = DateTime::Format::Excel
  130.             ->parse_datetime($incident_date)->ymd . "T" .
  131.             $incident_hour . ":00:00";
  132.  
  133.         # Build nodes
  134.  
  135.         # Create nodes
  136.        
  137.         # Create unique NHS Trust node
  138.         if (!defined($created_nhs_trust_node)) {
  139.             $last_known_query  =
  140.                 'MATCH ndt:NRLS_DATA_TYPE WHERE ndt.code = '
  141.                 . "'RP07' CREATE UNIQUE "
  142.                 . '(n:NHS_TRUST { name : "' . $nhs_trust . '" })-[:HAS_NRLS_DATA_TYPE]->ndt '
  143.                 . 'RETURN n';
  144.             $created_nhs_trust_node = 1;
  145.  
  146.             $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
  147.                 die ("Could not create neo4p query from $last_known_query.");
  148.      
  149.             $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  150.            
  151.             destroy_query ($neo4p_query);
  152.         }
  153.  
  154.         # Create unique NHS Trust Location node
  155.         $last_known_query =
  156.             'MATCH nt:NHS_TRUST WHERE nt.name = '
  157.             . "'${nhs_trust}'" . ' CREATE UNIQUE '
  158.             . '(ntl:NHS_TRUST_LOCATION { location_level_01 : "' . $nhs_trust_location
  159.             . '" })<-[:HAS_NHS_TRUST_LOCATION]-nt RETURN ntl';
  160.  
  161.         $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
  162.             die ("Could not create neo4p query from $last_known_query.");
  163.  
  164.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  165.        
  166.         destroy_query ($neo4p_query);
  167.  
  168.         # Create unique Incident node
  169.         $last_known_query =
  170.             'MATCH ndt:NRLS_DATA_TYPE WHERE ndt.code = '
  171.             . "'IN07' CREATE UNIQUE "
  172.             . '(i:INCIDENT { incident_id : "' . $incident_id . '", '
  173.             . 'incident_timestamp : "' . $incident_timestamp . '", '
  174.             . 'incident_description : "' . $incident_description
  175.             . '" })-[:HAS_NRLS_DATA_TYPE]->ndt RETURN i';
  176.  
  177.         $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
  178.             die ("Could not create neo4p query from $last_known_query.");
  179.  
  180.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  181.  
  182.         destroy_query ($neo4p_query);
  183.        
  184.         # Create relationship to NHS Trust
  185.         $last_known_query =
  186.             'MATCH n:NHS_TRUST, i:INCIDENT '
  187.             . 'WHERE n.name = '
  188.             . "'${nhs_trust}'"
  189.             . ' AND i.incident_id = '
  190.             . "'${incident_id}'" . ' CREATE UNIQUE '
  191.             . 'i-[r:IS_NHS_TRUST_INCIDENT]->n '
  192.             . 'RETURN r';
  193.  
  194.         $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
  195.             die ("Could not create neo4p query from $last_known_query.");
  196.    
  197.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  198.  
  199.         undef ($neo4p_query);
  200.        
  201.         # Create relationship to NHS Trust Location
  202.         $last_known_query =
  203.             'MATCH ntl:NHS_TRUST_LOCATION, i:INCIDENT '
  204.             . 'WHERE ntl.location_level_01 = '
  205.             . "'${nhs_trust_location}'"
  206.             . ' AND i.incident_id = '
  207.             . "'${incident_id}'" . ' CREATE UNIQUE '
  208.             . 'i-[r:IS_NHS_TRUST_LOCATION_INCIDENT]->ntl '
  209.             . 'RETURN r';
  210.  
  211.         $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
  212.             die ("Could not create neo4p query from $last_known_query.");
  213.    
  214.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  215.        
  216.         destroy_query ($neo4p_query);
  217.        
  218.         # Create unique Incident Category node
  219.         $last_known_query =
  220.             'MATCH ndt:NRLS_DATA_TYPE WHERE ndt.code = '
  221.             . "'IN05_lvl1' CREATE UNIQUE "
  222.             . '(ic:INCIDENT_CATEGORY { category_level_01 : "'
  223.             . $incident_category
  224.             . '" })-[:HAS_NRLS_DATA_TYPE]->ndt RETURN ic';
  225.  
  226.         $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
  227.             die ("Could not create neo4p query from $last_known_query.");
  228.  
  229.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  230.  
  231.         destroy_query ($neo4p_query);
  232.        
  233.         # Create unique Incident Category relationship
  234.         $last_known_query =
  235.             'MATCH i:INCIDENT, ic:INCIDENT_CATEGORY '
  236.             . 'WHERE i.incident_id = '
  237.             . "'${incident_id}'" . ' '
  238.             . 'AND ic.category_level_01 = '
  239.             . "'${incident_category}'"
  240.             . ' CREATE UNIQUE '
  241.             . 'ic<-[r:HAS_INCIDENT_CATEGORY]-i RETURN r';
  242.  
  243.         $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
  244.             die ("Could not create neo4p query from $last_known_query.");
  245.    
  246.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  247.        
  248.         destroy_query ($neo4p_query);
  249.        
  250.         # Create unique Incident Speciality node
  251.         $last_known_query =
  252.             'MATCH ndt:NRLS_DATA_TYPE WHERE ndt.code = '
  253.             . "'PD05_lvl1' CREATE UNIQUE "
  254.             . '(is:INCIDENT_SPECIALITY { speciality_level_01 : "'
  255.             . $incident_speciality
  256.             . '" })-[:HAS_NRLS_DATA_TYPE]->ndt RETURN is';
  257.  
  258.         $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
  259.             die ("Could not create neo4p query from $last_known_query.");
  260.  
  261.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  262.  
  263.         destroy_query ($neo4p_query);
  264.        
  265.         # Create unique Incident Speciality relationship
  266.         $last_known_query =
  267.             'MATCH i:INCIDENT, is:INCIDENT_SPECIALITY '
  268.             . 'WHERE i.incident_id = '
  269.             . "'${incident_id}'" . ' '
  270.             . 'AND is.speciality_level_01 = '
  271.             . "'${incident_speciality}'"
  272.             . ' CREATE UNIQUE '
  273.             . 'is<-[r:HAS_INCIDENT_SPECIALITY]-i RETURN r';
  274.  
  275.         $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
  276.             die ("Could not create neo4p query from $last_known_query.");
  277.    
  278.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  279.        
  280.         destroy_query ($neo4p_query);
  281.        
  282.         # Create unique Incident Reporter node
  283.         $last_known_query =
  284.             'MATCH ndt:NRLS_DATA_TYPE WHERE ndt.code = '
  285.             . "'ST01_LVL1' CREATE UNIQUE "
  286.             . '(ir:INCIDENT_REPORTER { reporter_level_01 : "'
  287.             . $incident_reporter
  288.             . '" })-[:HAS_NRLS_DATA_TYPE]->ndt RETURN ir';
  289.  
  290.         $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
  291.             die ("Could not create neo4p query from $last_known_query.");
  292.  
  293.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  294.  
  295.         destroy_query ($neo4p_query);
  296.        
  297.         # Create unique Incident Reporter relationship
  298.         $last_known_query =
  299.             'MATCH i:INCIDENT, ir:INCIDENT_REPORTER '
  300.             . 'WHERE i.incident_id = '
  301.             . "'${incident_id}'" . ' '
  302.             . 'AND ir.reporter_level_01 = '
  303.             . "'${incident_reporter}'"
  304.             . ' CREATE UNIQUE '
  305.             . 'ir<-[r:HAS_INCIDENT_REPORTER]-i RETURN r';
  306.  
  307.         $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
  308.             die ("Could not create neo4p query from $last_known_query.");
  309.    
  310.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  311.  
  312.         destroy_query ($neo4p_query);
  313.        
  314.         # Create unique Patient node
  315.         $last_known_query =
  316.             'MATCH ndt:NRLS_DATA_TYPE WHERE ndt.code = '
  317.             . "'Age_at_Incident' CREATE UNIQUE "
  318.             . '(p:PATIENT { patient_age : "'
  319.             . $patient_age
  320.             . '", patient_sex : "'
  321.             . $patient_sex
  322.             . '", patient_ethnicity : "'
  323.             . $patient_ethnicity
  324.             . '" })-[:HAS_NRLS_DATA_TYPE]->ndt RETURN p';
  325.  
  326.         $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
  327.             die ("Could not create neo4p query from $last_known_query.");
  328.    
  329.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  330.  
  331.         destroy_query ($neo4p_query);
  332.        
  333.         # Create unique Patient relationship
  334.         $last_known_query =
  335.             'MATCH i:INCIDENT, p:PATIENT '
  336.             . 'WHERE i.incident_id = '
  337.             . "'${incident_id}'" . ' '
  338.             . 'AND p.patient_age = '
  339.             . "'${patient_age}'" . ' '
  340.             . 'AND p.patient_sex = '
  341.             . "'${patient_sex}'" . ' '
  342.             . 'AND p.patient_ethnicity = '
  343.             . "'${patient_ethnicity}'"
  344.             . ' CREATE UNIQUE '
  345.             . ' p<-[r:HAS_INCIDENT_PATIENT]-i RETURN r';
  346.  
  347.         $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
  348.             die ("Could not create neo4p query from $last_known_query.");
  349.    
  350.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  351.  
  352.         destroy_query ($neo4p_query);
  353.        
  354.         # Create unique Degree Of Harm node
  355.         $last_known_query =
  356.             'MATCH ndt:NRLS_DATA_TYPE WHERE ndt.code = '
  357.             . "'PD09' CREATE UNIQUE "
  358.             . '(doh:DEGREE_OF_HARM { degree_of_harm : "'
  359.             . $degree_of_harm
  360.             . '" })-[:HAS_NRLS_DATA_TYPE]->ndt RETURN doh';
  361.  
  362.         $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
  363.             die ("Could not create neo4p query from $last_known_query.");
  364.    
  365.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  366.  
  367.         destroy_query ($neo4p_query);
  368.        
  369.         # Create unique Degree Of Harm relationship
  370.         $last_known_query =
  371.             'MATCH i:INCIDENT, doh:DEGREE_OF_HARM '
  372.             . 'WHERE i.incident_id = '
  373.             . "'${incident_id}'" . ' '
  374.             . 'AND doh.degree_of_harm = '
  375.             . "'${degree_of_harm}'"
  376.             . ' CREATE UNIQUE '
  377.             . ' doh<-[r:HAS_INCIDENT_DEGREE_OF_HARM]-i RETURN r';
  378.  
  379.         $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
  380.             die ("Could not create neo4p query from $last_known_query.");
  381.    
  382.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  383.  
  384.         destroy_query ($neo4p_query);        
  385.        
  386.     } catch {
  387.         my $current_row_number = $row->row_number() + 1;
  388.         say STDERR "Failure at row number $current_row_number in $input_file : $_.";
  389.         say STDERR "Last known query $last_known_query."
  390.             if (defined($last_known_query));
  391.         if (defined($neo4p_query) and defined($neo4p_query->errstr)) {
  392.             say STDERR "Last known neo4p query error: " , $neo4p_query->errstr;
  393.         }
  394.  
  395.         foreach my $required_column (@required_columns) {
  396.             if (defined($header_map{$required_column}) and
  397.                 defined($values[$header_map{$required_column}])) {
  398.                 say STDERR "Column $required_column : " , $values[$header_map{$required_column}];
  399.             }
  400.         }
  401.     };
  402.  
  403.     unless ($row->row_number() % 500) {
  404.         say "Processed " , $row->row_number() , " rows @ " , scalar localtime;
  405.     }
  406. }
  407.  
  408. sub destroy_query {
  409.     my $query = shift;
  410.    
  411.     if (defined($query)) {
  412.         while (my $response = $query->fetch()) {
  413.             # Who cares, throw it out
  414.         }
  415.         undef $query; # Destroy it
  416.     }
  417. }
Advertisement
Add Comment
Please, Sign In to add comment