icenine

Extract without destroy_query REST::Neo4p 0.126

Dec 18th, 2013
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 17.43 KB | None | 0 0
  1. #!/usr/bin/perl -w
  2.  
  3. use strict;
  4. use 5.010;
  5.  
  6. # Required for parsing NRLS data
  7. use Excel::Reader::XLSX;
  8. use DateTime::Format::Excel;
  9.  
  10. # Required for inputing data into Neo4j
  11. use REST::Neo4p;
  12.  
  13. use Data::Dumper;
  14. use Getopt::Long;
  15.  
  16. use Try::Tiny;
  17.  
  18. $| = 1;
  19.  
  20. my $input_file;
  21. my $neo_uri = 'http://larva.med.ic.ac.uk:7474';
  22.  
  23. GetOptions (
  24.     "input_file=s"  => \$input_file,
  25.     "neo_uri=s"     => \$neo_uri,);
  26.  
  27. if (!defined($input_file)) {
  28.     die "Usage: $0 --input_file <xlsx file>";
  29. }
  30.  
  31. # Define required column headers
  32. my @required_columns = (
  33.     "IN07",             # Incident description
  34.     "PD09",             # Indident degree of harm
  35.     "RP07",             # NHS Trust
  36.     "IN05_lvl1",        # Incident category
  37.     "PD05_lvl1",        # Speciality
  38.     "PD02",             # Patient sex
  39.     "IN01",             # Incident date
  40.     "IN02_A_01",        # Incident hour
  41.     "Age_at_Incident",  # Patient age
  42.     "PD11",             # Patient ethnicity
  43.     "ST01_LVL1",        # Recorder staff type
  44.     "IN03_lvl1",        # Incident location
  45.     "incidentID",       # Unique incident ID
  46. );
  47.  
  48. my $reader = Excel::Reader::XLSX->new();
  49.  
  50. my $workbook = $reader->read_file($input_file) or
  51.     die ("Could not read $input_file.");
  52.  
  53. my $worksheet = ($workbook->worksheets())[0] or
  54.     die ("Failed to assign worksheet from workbook.");
  55.  
  56. my $column_header_row = $worksheet->next_row() or
  57.     die ("Could not get column header row.");
  58.  
  59. my %header_map;
  60.  
  61. # Find column positions for required columns
  62. while (my $cell = $column_header_row->next_cell()) {
  63.     my $cell_value = $cell->value();
  64.  
  65.     if (grep /$cell_value/, @required_columns) {
  66.         $header_map{$cell_value} = $cell->col();
  67.     }
  68. }
  69.  
  70. # Create connection to Neo4j
  71. REST::Neo4p->connect($neo_uri) or
  72.     die ("Could not connect to $neo_uri.");
  73.  
  74. my (%nodes, %relationships, %created_nhs_trust_location);
  75.  
  76. my $created_nhs_trust_node;
  77.  
  78. while (my $row = $worksheet->next_row) {
  79.     my @values = $row->values();
  80.     if (@values == 0) {
  81.         warn ("Row at ", $row->row_number() , " contains no values.");
  82.     }
  83.  
  84.     @values = map {
  85.         if ($_ =~ /^ ?$/ or !defined($_)) {
  86.             $_ = "NOT_DEFINED";
  87.         } else {
  88.             $_;
  89.         }
  90.     } @values;
  91.  
  92.     my ($nhs_trust, $nhs_trust_location, $degree_of_harm,
  93.         $patient_age, $patient_sex, $patient_ethnicity,
  94.         $incident_id, $incident_date,
  95.         $incident_hour, $incident_description,
  96.         $incident_category, $incident_reporter,
  97.         $incident_speciality);
  98.  
  99.     my ($last_known_query, $neo4p_query);
  100.  
  101.     try {
  102.         $nhs_trust =            $values[$header_map{"RP07"}];
  103.         $nhs_trust_location =   $values[$header_map{"IN03_lvl1"}];
  104.         $degree_of_harm =       $values[$header_map{"PD09"}];
  105.         $patient_age =          $values[$header_map{"Age_at_Incident"}];
  106.         $patient_sex =          $values[$header_map{"PD02"}];
  107.         $patient_ethnicity =    $values[$header_map{"PD11"}];
  108.         $incident_id =          $values[$header_map{"incidentID"}];
  109.         $incident_date =        $values[$header_map{"IN01"}];
  110.         $incident_hour =        $values[$header_map{"IN02_A_01"}];
  111.         $incident_description = $values[$header_map{"IN07"}];
  112.         $incident_category =    $values[$header_map{"IN05_lvl1"}];
  113.         $incident_reporter =    $values[$header_map{"ST01_LVL1"}];
  114.         $incident_speciality =  $values[$header_map{"PD05_lvl1"}];
  115.  
  116.         # Normalize double quotes in incident description
  117.         $incident_description =~ s/"/\\"/g;
  118.  
  119.         # Assign unique trust id just to ensure uniqueness on set
  120.         my $trust_incident_id = $incident_id . "_" . $nhs_trust;
  121.  
  122.         # Normalize incident hour
  123.         if ($incident_hour eq 'NOT_DEFINED') {
  124.             $incident_hour = '00';
  125.         } else {
  126.             $incident_hour = substr($incident_hour, 0, 2);
  127.         }
  128.  
  129.         # Normalize date
  130.         my $incident_timestamp = DateTime::Format::Excel
  131.             ->parse_datetime($incident_date)->ymd . "T" .
  132.             $incident_hour . ":00:00";
  133.  
  134.         # Build nodes
  135.  
  136.         # Create nodes
  137.        
  138.         # Create unique NHS Trust node
  139.         if (!defined($created_nhs_trust_node) or $created_nhs_trust_node ne 1) {
  140.             $last_known_query  =
  141.                 'MATCH (ndt:NRLS_DATA_TYPE)'
  142.                 . ' WHERE'
  143.                 . ' ndt.code = {code}'
  144.                 . ' CREATE UNIQUE'
  145.                 . ' (n:NHS_TRUST { name : {name} })-[:HAS_NRLS_DATA_TYPE]->(ndt)'
  146.                 . ' RETURN n';
  147.                      
  148.             $created_nhs_trust_node = 1;
  149.  
  150.             $neo4p_query = REST::Neo4p::Query->new($last_known_query
  151.                 , { "code" => "RP07", "name" => $nhs_trust }) or
  152.                 die ("Could not create neo4p query from $last_known_query.");
  153.      
  154.             $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  155.            
  156.         }
  157.  
  158.         if (!defined($created_nhs_trust_location{$nhs_trust_location})) {
  159.             # Create unique NHS Trust Location node
  160.             $last_known_query =
  161.                 'MERGE (ntl:NHS_TRUST_LOCATION'
  162.                 . ' { location_level_01 : { location_level_01 } }) RETURN ntl';
  163.      
  164.             $neo4p_query = REST::Neo4p::Query->new($last_known_query
  165.                 , { "location_level_01" => $nhs_trust_location }) or
  166.                 die ("Could not create neo4p query from $last_known_query.");
  167.      
  168.             $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  169.            
  170.            
  171.             # Create relationship between NHS Trust/Location    
  172.             $last_known_query =
  173.                 'MATCH (nt:NHS_TRUST), (ntl:NHS_TRUST_LOCATION)'
  174.                 . ' WHERE nt.name = { name }'
  175.                 . ' AND ntl.location_level_01 = { location_level_01 }'
  176.                 . ' CREATE UNIQUE'
  177.                 . ' (ntl)<-[hntl:HAS_NHS_TRUST_LOCATION]-nt RETURN hntl';
  178.      
  179.             $neo4p_query = REST::Neo4p::Query->new($last_known_query
  180.                 , { "name" => $nhs_trust, "location_level_01" => $nhs_trust_location }) or
  181.                 die ("Could not create neo4p query from $last_known_query.");
  182.      
  183.             $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  184.            
  185.            
  186.             $created_nhs_trust_location{$nhs_trust_location} = 1;
  187.         }
  188.        
  189.         # Create unique Incident node (IN07) ${nhs_trust}
  190.         $last_known_query =
  191.             'MERGE (i:INCIDENT { incident_id : { incident_id } })'
  192.             . ' ON CREATE SET i.incident_timestamp = { incident_timestamp },'
  193.             . ' i.incident_description = { incident_description }'
  194.             . ' RETURN i';
  195.        
  196.         $neo4p_query = REST::Neo4p::Query->new($last_known_query
  197.             , { "incident_id" => $incident_id
  198.                         , "incident_timestamp" => $incident_timestamp
  199.                         , "incident_description" => $incident_description }) or
  200.                         die ("Could not create neo4p query from $last_known_query.");
  201.  
  202.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  203.  
  204.  
  205.         # Create unique Incident node NRLS_DATA_TYPE relationship
  206.         $last_known_query =
  207.             'MATCH (i:INCIDENT), (ndt:NRLS_DATA_TYPE) '
  208.             . ' WHERE i.incident_id = { incident_id } '
  209.             . ' AND ndt.code = { code } '
  210.             . ' CREATE UNIQUE'
  211.             . ' (i)-[hndt:HAS_NRLS_DATA_TYPE]->ndt RETURN hndt';
  212.  
  213.         $neo4p_query = REST::Neo4p::Query->new($last_known_query
  214.                     , { "incident_id" => $incident_id, "code" => "IN07" }) or
  215.             die ("Could not create neo4p query from $last_known_query.");
  216.  
  217.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  218.  
  219.        
  220.         # Create relationship to NHS Trust ${nhs_trust} ${incident_id}
  221.         $last_known_query =
  222.             'MATCH (n:NHS_TRUST), (i:INCIDENT)'
  223.             . ' WHERE n.name = { name }'
  224.             . ' AND i.incident_id = { incident_id }'
  225.             . ' CREATE UNIQUE '
  226.             . ' i-[r:IS_NHS_TRUST_INCIDENT]->n '
  227.             . ' RETURN r';
  228.  
  229.         $neo4p_query = REST::Neo4p::Query->new($last_known_query
  230.             , { "name" => $nhs_trust, "incident_id" => $incident_id }) or
  231.             die ("Could not create neo4p query from $last_known_query.");
  232.    
  233.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  234.  
  235.         undef ($neo4p_query);
  236.        
  237.         # Create relationship to NHS Trust Location ${nhs_trust_location} ${incident_id}
  238.         $last_known_query =
  239.             'MATCH (ntl:NHS_TRUST_LOCATION), (i:INCIDENT)'
  240.             . ' WHERE ntl.location_level_01 = { location_level_01 }'
  241.             . ' AND i.incident_id = { incident_id }'
  242.             . ' CREATE UNIQUE '
  243.             . ' i-[r:IS_NHS_TRUST_LOCATION_INCIDENT]->ntl '
  244.             . ' RETURN r';
  245.  
  246.         $neo4p_query = REST::Neo4p::Query->new($last_known_query
  247.             , { "location_level_01" => $nhs_trust_location
  248.             , "incident_id" => $incident_id }) or
  249.             die ("Could not create neo4p query from $last_known_query.");
  250.    
  251.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  252.        
  253.        
  254.         # Create unique Incident Category node
  255.         $last_known_query =
  256.             'MATCH (ndt:NRLS_DATA_TYPE) WHERE ndt.code = '
  257.             . ' { code } CREATE UNIQUE'
  258.             . ' (ic:INCIDENT_CATEGORY { category_level_01 : { category_level_01 } '
  259.             . ' })-[:HAS_NRLS_DATA_TYPE]->ndt RETURN ic';
  260.  
  261.         $neo4p_query = REST::Neo4p::Query->new($last_known_query
  262.                         , { "code" => "IN05_lvl1", "category_level_01" => $incident_category }) or
  263.             die ("Could not create neo4p query from $last_known_query.");
  264.  
  265.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  266.  
  267.        
  268.         # Create unique Incident Category relationship
  269.         $last_known_query =
  270.             'MATCH (i:INCIDENT), (ic:INCIDENT_CATEGORY)'
  271.             . ' WHERE i.incident_id = { incident_id }'
  272.             . ' AND ic.category_level_01 = { category_level_01 }'
  273.             . ' CREATE UNIQUE'
  274.             . ' ic<-[r:HAS_INCIDENT_CATEGORY]-i RETURN r';
  275.  
  276.         $neo4p_query = REST::Neo4p::Query->new($last_known_query
  277.             , { "incident_id" => $incident_id
  278.             , "category_level_01" => $incident_category }) or
  279.             die ("Could not create neo4p query from $last_known_query.");
  280.    
  281.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  282.        
  283.        
  284.         # Create unique Incident Speciality node PD05_lvl1
  285.         $last_known_query =
  286.             'MATCH (ndt:NRLS_DATA_TYPE) WHERE ndt.code = { code }'
  287.             . ' CREATE UNIQUE'
  288.             . ' (is:INCIDENT_SPECIALITY { speciality_level_01 : { speciality_level_01 }'
  289.             . ' })-[:HAS_NRLS_DATA_TYPE]->ndt RETURN is';
  290.  
  291.         $neo4p_query = REST::Neo4p::Query->new($last_known_query
  292.             , { "code" => "PD05_lvl1", "speciality_level_01" => $incident_speciality }) or
  293.             die ("Could not create neo4p query from $last_known_query.");
  294.  
  295.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  296.  
  297.        
  298.         # Create unique Incident Speciality relationship incident_id incident_speciality
  299.         $last_known_query =
  300.             'MATCH (i:INCIDENT), (is:INCIDENT_SPECIALITY)'
  301.             . ' WHERE i.incident_id = { incident_id }'
  302.             . ' AND is.speciality_level_01 = { speciality_level_01 }'
  303.             . ' CREATE UNIQUE '
  304.             . ' is<-[r:HAS_INCIDENT_SPECIALITY]-i RETURN r';
  305.  
  306.         $neo4p_query = REST::Neo4p::Query->new($last_known_query
  307.             , { "incident_id" => $incident_id
  308.             , "speciality_level_01" => $incident_speciality }) or
  309.             die ("Could not create neo4p query from $last_known_query.");
  310.    
  311.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  312.        
  313.        
  314.         # Create unique Incident Reporter node ST01_LVL1
  315.         $last_known_query =
  316.             'MATCH (ndt:NRLS_DATA_TYPE) WHERE ndt.code ='
  317.             . ' { code } CREATE UNIQUE '
  318.             . ' (ir:INCIDENT_REPORTER { reporter_level_01 : { reporter_level_01 }'
  319.             . ' })-[:HAS_NRLS_DATA_TYPE]->ndt RETURN ir';
  320.  
  321.         $neo4p_query = REST::Neo4p::Query->new($last_known_query
  322.             , { "code" => "ST01_LVL1"
  323.             , "reporter_level_01" => $incident_reporter }) or
  324.             die ("Could not create neo4p query from $last_known_query.");
  325.  
  326.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  327.  
  328.        
  329.         # Create unique Incident Reporter relationship
  330.         $last_known_query =
  331.             'MATCH (i:INCIDENT), (ir:INCIDENT_REPORTER)'
  332.             . ' WHERE i.incident_id = { incident_id }'
  333.             . ' AND ir.reporter_level_01 = { incident_reporter }'
  334.             . ' CREATE UNIQUE '
  335.             . ' ir<-[r:HAS_INCIDENT_REPORTER]-i RETURN r';
  336.  
  337.         $neo4p_query = REST::Neo4p::Query->new($last_known_query
  338.             , { "incident_id" => $incident_id
  339.             , "incident_reporter" => $incident_reporter }) or
  340.             die ("Could not create neo4p query from $last_known_query.");
  341.    
  342.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  343.  
  344.        
  345.         # Create unique Patient node
  346.         $last_known_query =
  347.             'MATCH (ndt:NRLS_DATA_TYPE) WHERE ndt.code = '
  348.             . "'Age_at_Incident' CREATE UNIQUE "
  349.             . '(p:PATIENT { patient_age : "'
  350.             . $patient_age
  351.             . '", patient_sex : "'
  352.             . $patient_sex
  353.             . '", patient_ethnicity : "'
  354.             . $patient_ethnicity
  355.             . '" })-[:HAS_NRLS_DATA_TYPE]->ndt RETURN p';
  356.  
  357.         $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
  358.             die ("Could not create neo4p query from $last_known_query.");
  359.    
  360.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  361.  
  362.        
  363.         # Create unique Patient relationship
  364.         $last_known_query =
  365.             'MATCH (i:INCIDENT), (p:PATIENT) '
  366.             . ' WHERE i.incident_id = { incident_id }'
  367.             . ' AND p.patient_age = { patient_age }'
  368.             . ' AND p.patient_sex = { patient_sex }'
  369.             . ' AND p.patient_ethnicity = { patient_ethnicity }'
  370.             . ' CREATE UNIQUE '
  371.             . ' p<-[r:HAS_INCIDENT_PATIENT]-i RETURN r';
  372.  
  373.         $neo4p_query = REST::Neo4p::Query->new($last_known_query
  374.             , { "incident_id" => $incident_id
  375.             , "patient_age" => $patient_age
  376.             , "patient_sex" => $patient_sex
  377.             , "patient_ethnicity" => $patient_ethnicity }) or
  378.             die ("Could not create neo4p query from $last_known_query.");
  379.    
  380.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  381.  
  382.        
  383.         # Create unique Degree Of Harm node
  384.         $last_known_query =
  385.             'MATCH (ndt:NRLS_DATA_TYPE) WHERE ndt.code = { code }'
  386.             . ' CREATE UNIQUE'
  387.             . ' (doh:DEGREE_OF_HARM { degree_of_harm : { degree_of_harm }'
  388.             . ' })-[:HAS_NRLS_DATA_TYPE]->ndt RETURN doh';
  389.  
  390.         $neo4p_query = REST::Neo4p::Query->new($last_known_query
  391.             , { "code" => "PD09"
  392.             , "degree_of_harm" => $degree_of_harm }) or
  393.             die ("Could not create neo4p query from $last_known_query.");
  394.    
  395.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  396.  
  397.        
  398.         # Create unique Degree Of Harm relationship
  399.         $last_known_query =
  400.             'MATCH (i:INCIDENT), (doh:DEGREE_OF_HARM)'
  401.             . ' WHERE i.incident_id = { incident_id }'
  402.             . ' AND doh.degree_of_harm = { degree_of_harm }'
  403.             . ' CREATE UNIQUE '
  404.             . ' doh<-[r:HAS_INCIDENT_DEGREE_OF_HARM]-i RETURN r';
  405.  
  406.         $neo4p_query = REST::Neo4p::Query->new($last_known_query
  407.             , { "incident_id" => $incident_id
  408.             , "degree_of_harm" => $degree_of_harm}) or
  409.             die ("Could not create neo4p query from $last_known_query.");
  410.    
  411.         $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
  412.  
  413.        
  414.     } catch {
  415.         my $current_row_number = $row->row_number() + 1;
  416.         say STDERR "Failure at row number $current_row_number in $input_file : $_.";
  417.         say STDERR "Last known query $last_known_query."
  418.             if (defined($last_known_query));
  419.         if (defined($neo4p_query) and defined($neo4p_query->errstr)) {
  420.             say STDERR "Last known neo4p query error: " , $neo4p_query->errstr;
  421.         }
  422.  
  423.         foreach my $required_column (@required_columns) {
  424.             if (defined($header_map{$required_column}) and
  425.                 defined($values[$header_map{$required_column}])) {
  426.                 say STDERR "Column $required_column : " , $values[$header_map{$required_column}];
  427.             }
  428.         }
  429.     };
  430.  
  431.     unless ($row->row_number() % 500) {
  432.         say "Processed " , $row->row_number() , " rows. " , scalar localtime;
  433.     }
  434. }
  435.  
  436. sub destroy_query {
  437.     my $query = shift;
  438.    
  439.     if (defined($query)) {
  440.         while (my $response = $query->fetch()) {
  441.             # Who cares, throw it out
  442.         }
  443.         undef $query; # Destroy it
  444.     }
  445. }
Advertisement
Add Comment
Please, Sign In to add comment