Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl -w
- use strict;
- use 5.010;
- # Required for parsing NRLS data
- use Excel::Reader::XLSX;
- use DateTime::Format::Excel;
- # Required for inputing data into Neo4j
- use REST::Neo4p;
- use Data::Dumper;
- use Getopt::Long;
- use Try::Tiny;
- $| = 1;
- my $input_file;
- my $neo_uri = 'http://larva.med.ic.ac.uk:7474';
- GetOptions (
- "input_file=s" => \$input_file,
- "neo_uri=s" => \$neo_uri,);
- if (!defined($input_file)) {
- die "Usage: $0 --input_file <xlsx file>";
- }
- # Define required column headers
- my @required_columns = (
- "IN07", # Incident description
- "PD09", # Indident degree of harm
- "RP07", # NHS Trust
- "IN05_lvl1", # Incident category
- "PD05_lvl1", # Speciality
- "PD02", # Patient sex
- "IN01", # Incident date
- "IN02_A_01", # Incident hour
- "Age_at_Incident", # Patient age
- "PD11", # Patient ethnicity
- "ST01_LVL1", # Recorder staff type
- "IN03_lvl1", # Incident location
- "incidentID", # Unique incident ID
- );
- my $reader = Excel::Reader::XLSX->new();
- my $workbook = $reader->read_file($input_file) or
- die ("Could not read $input_file.");
- my $worksheet = ($workbook->worksheets())[0] or
- die ("Failed to assign worksheet from workbook.");
- my $column_header_row = $worksheet->next_row() or
- die ("Could not get column header row.");
- my %header_map;
- # Find column positions for required columns
- while (my $cell = $column_header_row->next_cell()) {
- my $cell_value = $cell->value();
- if (grep /$cell_value/, @required_columns) {
- $header_map{$cell_value} = $cell->col();
- }
- }
- # Create connection to Neo4j
- REST::Neo4p->connect($neo_uri) or
- die ("Could not connect to $neo_uri.");
- my (%nodes, %relationships, %created_nhs_trust_location);
- my $created_nhs_trust_node;
- while (my $row = $worksheet->next_row) {
- my @values = $row->values();
- if (@values == 0) {
- warn ("Row at ", $row->row_number() , " contains no values.");
- }
- @values = map {
- if ($_ =~ /^ ?$/ or !defined($_)) {
- $_ = "NOT_DEFINED";
- } else {
- $_;
- }
- } @values;
- my ($nhs_trust, $nhs_trust_location, $degree_of_harm,
- $patient_age, $patient_sex, $patient_ethnicity,
- $incident_id, $incident_date,
- $incident_hour, $incident_description,
- $incident_category, $incident_reporter,
- $incident_speciality);
- my ($last_known_query, $neo4p_query);
- try {
- $nhs_trust = $values[$header_map{"RP07"}];
- $nhs_trust_location = $values[$header_map{"IN03_lvl1"}];
- $degree_of_harm = $values[$header_map{"PD09"}];
- $patient_age = $values[$header_map{"Age_at_Incident"}];
- $patient_sex = $values[$header_map{"PD02"}];
- $patient_ethnicity = $values[$header_map{"PD11"}];
- $incident_id = $values[$header_map{"incidentID"}];
- $incident_date = $values[$header_map{"IN01"}];
- $incident_hour = $values[$header_map{"IN02_A_01"}];
- $incident_description = $values[$header_map{"IN07"}];
- $incident_category = $values[$header_map{"IN05_lvl1"}];
- $incident_reporter = $values[$header_map{"ST01_LVL1"}];
- $incident_speciality = $values[$header_map{"PD05_lvl1"}];
- # Normalize double quotes in incident description
- $incident_description =~ s/"/\\"/g;
- # Assign unique trust id just to ensure uniqueness on set
- my $trust_incident_id = $incident_id . "_" . $nhs_trust;
- # Normalize incident hour
- if ($incident_hour eq 'NOT_DEFINED') {
- $incident_hour = '00';
- } else {
- $incident_hour = substr($incident_hour, 0, 2);
- }
- # Normalize date
- my $incident_timestamp = DateTime::Format::Excel
- ->parse_datetime($incident_date)->ymd . "T" .
- $incident_hour . ":00:00";
- # Build nodes
- # Create nodes
- # Create unique NHS Trust node
- if (!defined($created_nhs_trust_node)) {
- $last_known_query =
- 'MATCH (ndt:NRLS_DATA_TYPE)'
- . ' WHERE'
- . ' ndt.code = {code}'
- . ' CREATE UNIQUE'
- . ' (n:NHS_TRUST { name : {name} })-[:HAS_NRLS_DATA_TYPE]->(ndt)'
- . ' RETURN n';
- $created_nhs_trust_node = 1;
- $neo4p_query = REST::Neo4p::Query->new($last_known_query
- , { "code" => "RP07", "name" => $nhs_trust }) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- }
- if (!defined($created_nhs_trust_location{$nhs_trust_location})) {
- # Create unique NHS Trust Location node
- $last_known_query =
- 'MERGE (ntl:NHS_TRUST_LOCATION'
- . ' { location_level_01 : { location_level_01 } }) RETURN ntl';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query
- , { "location_level_01" => $nhs_trust_location }) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create relationship between NHS Trust/Location
- $last_known_query =
- 'MATCH (nt:NHS_TRUST), (ntl:NHS_TRUST_LOCATION)'
- . ' WHERE nt.name = { name }'
- . ' AND ntl.location_level_01 = { location_level_01 }'
- . ' CREATE UNIQUE'
- . ' (ntl)<-[hntl:HAS_NHS_TRUST_LOCATION]-nt RETURN hntl';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query
- , { "name" => $nhs_trust, "location_level_01" => $nhs_trust_location }) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- $created_nhs_trust_location{$nhs_trust_location} = 1;
- }
- # Create unique Incident node (IN07) ${nhs_trust}
- $last_known_query =
- 'MERGE (i:INCIDENT { incident_id : { incident_id } })'
- . ' ON CREATE SET i.incident_timestamp = { incident_timestamp },'
- . ' i.incident_description = { incident_description }'
- . ' RETURN i';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query
- , { "incident_id" => $incident_id
- , "incident_timestamp" => $incident_timestamp
- , "incident_description" => $incident_description }) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create unique Incident node NRLS_DATA_TYPE relationship
- $last_known_query =
- 'MATCH (i:INCIDENT), (ndt:NRLS_DATA_TYPE) '
- . ' WHERE i.incident_id = { incident_id } '
- . ' AND ndt.code = { code } '
- . ' CREATE UNIQUE'
- . ' (i)-[hndt:HAS_NRLS_DATA_TYPE]->ndt RETURN hndt';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query
- , { "incident_id" => $incident_id, "code" => "IN07" }) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create relationship to NHS Trust ${nhs_trust} ${incident_id}
- $last_known_query =
- 'MATCH (n:NHS_TRUST), (i:INCIDENT)'
- . ' WHERE n.name = { name }'
- . ' AND i.incident_id = { incident_id }'
- . ' CREATE UNIQUE '
- . ' i-[r:IS_NHS_TRUST_INCIDENT]->n '
- . ' RETURN r';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query
- , { "name" => $nhs_trust, "incident_id" => $incident_id }) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- undef ($neo4p_query);
- # Create relationship to NHS Trust Location ${nhs_trust_location} ${incident_id}
- $last_known_query =
- 'MATCH (ntl:NHS_TRUST_LOCATION), (i:INCIDENT)'
- . ' WHERE ntl.location_level_01 = { location_level_01 }'
- . ' AND i.incident_id = { incident_id }'
- . ' CREATE UNIQUE '
- . ' i-[r:IS_NHS_TRUST_LOCATION_INCIDENT]->ntl '
- . ' RETURN r';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query
- , { "location_level_01" => $nhs_trust_location
- , "incident_id" => $incident_id }) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create unique Incident Category node
- $last_known_query =
- 'MATCH (ndt:NRLS_DATA_TYPE) WHERE ndt.code = '
- . ' { code } CREATE UNIQUE'
- . ' (ic:INCIDENT_CATEGORY { category_level_01 : { category_level_01 } '
- . ' })-[:HAS_NRLS_DATA_TYPE]->ndt RETURN ic';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query
- , { "code" => "IN05_lvl1", "category_level_01" => $incident_category }) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create unique Incident Category relationship
- $last_known_query =
- 'MATCH (i:INCIDENT), (ic:INCIDENT_CATEGORY)'
- . ' WHERE i.incident_id = { incident_id }'
- . ' AND ic.category_level_01 = { category_level_01 }'
- . ' CREATE UNIQUE'
- . ' ic<-[r:HAS_INCIDENT_CATEGORY]-i RETURN r';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query
- , { "incident_id" => $incident_id
- , "category_level_01" => $incident_category }) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create unique Incident Speciality node PD05_lvl1
- $last_known_query =
- 'MATCH (ndt:NRLS_DATA_TYPE) WHERE ndt.code = { code }'
- . ' CREATE UNIQUE'
- . ' (is:INCIDENT_SPECIALITY { speciality_level_01 : { speciality_level_01 }'
- . ' })-[:HAS_NRLS_DATA_TYPE]->ndt RETURN is';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query
- , { "code" => "PD05_lvl1", "speciality_level_01" => $incident_speciality }) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create unique Incident Speciality relationship incident_id incident_speciality
- $last_known_query =
- 'MATCH (i:INCIDENT), (is:INCIDENT_SPECIALITY)'
- . ' WHERE i.incident_id = { incident_id }'
- . ' AND is.speciality_level_01 = { speciality_level_01 }'
- . ' CREATE UNIQUE '
- . ' is<-[r:HAS_INCIDENT_SPECIALITY]-i RETURN r';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query
- , { "incident_id" => $incident_id
- , "speciality_level_01" => $incident_speciality }) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create unique Incident Reporter node ST01_LVL1
- $last_known_query =
- 'MATCH (ndt:NRLS_DATA_TYPE) WHERE ndt.code ='
- . ' { code } CREATE UNIQUE '
- . ' (ir:INCIDENT_REPORTER { reporter_level_01 : { reporter_level_01 }'
- . ' })-[:HAS_NRLS_DATA_TYPE]->ndt RETURN ir';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query
- , { "code" => "ST01_LVL1"
- , "reporter_level_01" => $incident_reporter }) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create unique Incident Reporter relationship
- $last_known_query =
- 'MATCH (i:INCIDENT), (ir:INCIDENT_REPORTER)'
- . ' WHERE i.incident_id = { incident_id }'
- . ' AND ir.reporter_level_01 = { incident_reporter }'
- . ' CREATE UNIQUE '
- . ' ir<-[r:HAS_INCIDENT_REPORTER]-i RETURN r';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query
- , { "incident_id" => $incident_id
- , "incident_reporter" => $incident_reporter }) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create unique Patient node
- $last_known_query =
- 'MATCH (ndt:NRLS_DATA_TYPE) WHERE ndt.code = '
- . "'Age_at_Incident' CREATE UNIQUE "
- . '(p:PATIENT { patient_age : "'
- . $patient_age
- . '", patient_sex : "'
- . $patient_sex
- . '", patient_ethnicity : "'
- . $patient_ethnicity
- . '" })-[:HAS_NRLS_DATA_TYPE]->ndt RETURN p';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create unique Patient relationship
- $last_known_query =
- 'MATCH (i:INCIDENT), (p:PATIENT) '
- . ' WHERE i.incident_id = { incident_id }'
- . ' AND p.patient_age = { patient_age }'
- . ' AND p.patient_sex = { patient_sex }'
- . ' AND p.patient_ethnicity = { patient_ethnicity }'
- . ' CREATE UNIQUE '
- . ' p<-[r:HAS_INCIDENT_PATIENT]-i RETURN r';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query
- , { "incident_id" => $incident_id
- , "patient_age" => $patient_age
- , "patient_sex" => $patient_sex
- , "patient_ethnicity" => $patient_ethnicity }) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create unique Degree Of Harm node
- $last_known_query =
- 'MATCH (ndt:NRLS_DATA_TYPE) WHERE ndt.code = { code }'
- . ' CREATE UNIQUE'
- . ' (doh:DEGREE_OF_HARM { degree_of_harm : { degree_of_harm }'
- . ' })-[:HAS_NRLS_DATA_TYPE]->ndt RETURN doh';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query
- , { "code" => "PD09"
- , "degree_of_harm" => $degree_of_harm }) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create unique Degree Of Harm relationship
- $last_known_query =
- 'MATCH (i:INCIDENT), (doh:DEGREE_OF_HARM)'
- . ' WHERE i.incident_id = { incident_id }'
- . ' AND doh.degree_of_harm = { degree_of_harm }'
- . ' CREATE UNIQUE '
- . ' doh<-[r:HAS_INCIDENT_DEGREE_OF_HARM]-i RETURN r';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query
- , { "incident_id" => $incident_id
- , "degree_of_harm" => $degree_of_harm}) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- } catch {
- my $current_row_number = $row->row_number() + 1;
- say STDERR "Failure at row number $current_row_number in $input_file : $_.";
- say STDERR "Last known query $last_known_query."
- if (defined($last_known_query));
- if (defined($neo4p_query) and defined($neo4p_query->errstr)) {
- say STDERR "Last known neo4p query error: " , $neo4p_query->errstr;
- }
- foreach my $required_column (@required_columns) {
- if (defined($header_map{$required_column}) and
- defined($values[$header_map{$required_column}])) {
- say STDERR "Column $required_column : " , $values[$header_map{$required_column}];
- }
- }
- };
- unless ($row->row_number() % 500) {
- say "Processed " , $row->row_number() , " rows. " , scalar localtime;
- }
- }
- sub destroy_query {
- my $query = shift;
- if (defined($query)) {
- while (my $response = $query->fetch()) {
- # Who cares, throw it out
- }
- undef $query; # Destroy it
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment