Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl -w
- use strict;
- use 5.010;
- # Required for parsing NRLS data
- use Excel::Reader::XLSX;
- use DateTime::Format::Excel;
- # Required for inputing data into Neo4j
- use REST::Neo4p;
- use Data::Dumper;
- use Getopt::Long;
- use Try::Tiny;
- my $input_file;
- my $neo_uri = 'http://larva.med.ic.ac.uk:7474';
- GetOptions (
- "input_file=s" => \$input_file,
- "neo_uri=s" => \$neo_uri,);
- if (!defined($input_file)) {
- die "Usage: $0 --input_file <xlsx file>";
- }
- # Define required column headers
- my @required_columns = (
- "IN07", # Incident description
- "PD09", # Indident degree of harm
- "RP07", # NHS Trust
- "IN05_lvl1", # Incident category
- "PD05_lvl1", # Speciality
- "PD02", # Patient sex
- "IN01", # Incident date
- "IN02_A_01", # Incident hour
- "Age_at_Incident", # Patient age
- "PD11", # Patient ethnicity
- "ST01_LVL1", # Recorder staff type
- "IN03_lvl1", # Incident location
- "incidentID", # Unique incident ID
- );
- my $reader = Excel::Reader::XLSX->new();
- my $workbook = $reader->read_file($input_file) or
- die ("Could not read $input_file.");
- my $worksheet = ($workbook->worksheets())[0] or
- die ("Failed to assign worksheet from workbook.");
- my $column_header_row = $worksheet->next_row() or
- die ("Could not get column header row.");
- my %header_map;
- # Find column positions for required columns
- while (my $cell = $column_header_row->next_cell()) {
- my $cell_value = $cell->value();
- if (grep /$cell_value/, @required_columns) {
- $header_map{$cell_value} = $cell->col();
- }
- }
- # Create connection to Neo4j
- REST::Neo4p->connect($neo_uri) or
- die ("Could not connect to $neo_uri.");
- my (%nodes, %relationships);
- my $created_nhs_trust_node;
- while (my $row = $worksheet->next_row) {
- my @values = $row->values();
- if (@values == 0) {
- warn ("Row at ", $row->row_number() , " contains no values.");
- }
- @values = map {
- if ($_ =~ /^ ?$/ or !defined($_)) {
- $_ = "NOT_DEFINED";
- } else {
- $_;
- }
- } @values;
- my ($nhs_trust, $nhs_trust_location, $degree_of_harm,
- $patient_age, $patient_sex, $patient_ethnicity,
- $incident_id, $incident_date,
- $incident_hour, $incident_description,
- $incident_category, $incident_reporter,
- $incident_speciality);
- my ($last_known_query, $neo4p_query);
- try {
- $nhs_trust = $values[$header_map{"RP07"}];
- $nhs_trust_location = $values[$header_map{"IN03_lvl1"}];
- $degree_of_harm = $values[$header_map{"PD09"}];
- $patient_age = $values[$header_map{"Age_at_Incident"}];
- $patient_sex = $values[$header_map{"PD02"}];
- $patient_ethnicity = $values[$header_map{"PD11"}];
- $incident_id = $values[$header_map{"incidentID"}];
- $incident_date = $values[$header_map{"IN01"}];
- $incident_hour = $values[$header_map{"IN02_A_01"}];
- $incident_description = $values[$header_map{"IN07"}];
- $incident_category = $values[$header_map{"IN05_lvl1"}];
- $incident_reporter = $values[$header_map{"ST01_LVL1"}];
- $incident_speciality = $values[$header_map{"PD05_lvl1"}];
- # Normalize double quotes in incident description
- $incident_description =~ s/\\//g;
- $incident_description =~ s/"/\\"/g;
- # Assign unique trust id just to ensure uniqueness on set
- my $trust_incident_id = $incident_id . "_" . $nhs_trust;
- # Normalize incident hour
- if ($incident_hour eq 'NOT_DEFINED') {
- $incident_hour = '00';
- } else {
- $incident_hour = substr($incident_hour, 0, 2);
- }
- # Normalize date
- my $incident_timestamp = DateTime::Format::Excel
- ->parse_datetime($incident_date)->ymd . "T" .
- $incident_hour . ":00:00";
- # Build nodes
- # Create nodes
- # Create unique NHS Trust node
- if (!defined($created_nhs_trust_node)) {
- $last_known_query =
- 'MATCH ndt:NRLS_DATA_TYPE WHERE ndt.code = '
- . "'RP07' CREATE UNIQUE "
- . '(n:NHS_TRUST { name : "' . $nhs_trust . '" })-[:HAS_NRLS_DATA_TYPE]->ndt '
- . 'RETURN n';
- $created_nhs_trust_node = 1;
- $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- }
- # Create unique NHS Trust Location node
- $last_known_query =
- 'MATCH nt:NHS_TRUST WHERE nt.name = '
- . "'${nhs_trust}'" . ' CREATE UNIQUE '
- . '(ntl:NHS_TRUST_LOCATION { location_level_01 : "' . $nhs_trust_location
- . '" })<-[:HAS_NHS_TRUST_LOCATION]-nt RETURN ntl';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create unique Incident node
- $last_known_query =
- 'MATCH ndt:NRLS_DATA_TYPE WHERE ndt.code = '
- . "'IN07' CREATE UNIQUE "
- . '(i:INCIDENT { incident_id : "' . $incident_id . '", '
- . 'incident_timestamp : "' . $incident_timestamp . '", '
- . 'incident_description : "' . $incident_description
- . '" })-[:HAS_NRLS_DATA_TYPE]->ndt RETURN i';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create relationship to NHS Trust
- $last_known_query =
- 'MATCH n:NHS_TRUST, i:INCIDENT '
- . 'WHERE n.name = '
- . "'${nhs_trust}'"
- . ' AND i.incident_id = '
- . "'${incident_id}'" . ' CREATE UNIQUE '
- . 'i-[r:IS_NHS_TRUST_INCIDENT]->n '
- . 'RETURN r';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- undef ($neo4p_query);
- # Create relationship to NHS Trust Location
- $last_known_query =
- 'MATCH ntl:NHS_TRUST_LOCATION, i:INCIDENT '
- . 'WHERE ntl.location_level_01 = '
- . "'${nhs_trust_location}'"
- . ' AND i.incident_id = '
- . "'${incident_id}'" . ' CREATE UNIQUE '
- . 'i-[r:IS_NHS_TRUST_LOCATION_INCIDENT]->ntl '
- . 'RETURN r';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create unique Incident Category node
- $last_known_query =
- 'MATCH ndt:NRLS_DATA_TYPE WHERE ndt.code = '
- . "'IN05_lvl1' CREATE UNIQUE "
- . '(ic:INCIDENT_CATEGORY { category_level_01 : "'
- . $incident_category
- . '" })-[:HAS_NRLS_DATA_TYPE]->ndt RETURN ic';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create unique Incident Category relationship
- $last_known_query =
- 'MATCH i:INCIDENT, ic:INCIDENT_CATEGORY '
- . 'WHERE i.incident_id = '
- . "'${incident_id}'" . ' '
- . 'AND ic.category_level_01 = '
- . "'${incident_category}'"
- . ' CREATE UNIQUE '
- . 'ic<-[r:HAS_INCIDENT_CATEGORY]-i RETURN r';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create unique Incident Speciality node
- $last_known_query =
- 'MATCH ndt:NRLS_DATA_TYPE WHERE ndt.code = '
- . "'PD05_lvl1' CREATE UNIQUE "
- . '(is:INCIDENT_SPECIALITY { speciality_level_01 : "'
- . $incident_speciality
- . '" })-[:HAS_NRLS_DATA_TYPE]->ndt RETURN is';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create unique Incident Speciality relationship
- $last_known_query =
- 'MATCH i:INCIDENT, is:INCIDENT_SPECIALITY '
- . 'WHERE i.incident_id = '
- . "'${incident_id}'" . ' '
- . 'AND is.speciality_level_01 = '
- . "'${incident_speciality}'"
- . ' CREATE UNIQUE '
- . 'is<-[r:HAS_INCIDENT_SPECIALITY]-i RETURN r';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create unique Incident Reporter node
- $last_known_query =
- 'MATCH ndt:NRLS_DATA_TYPE WHERE ndt.code = '
- . "'ST01_LVL1' CREATE UNIQUE "
- . '(ir:INCIDENT_REPORTER { reporter_level_01 : "'
- . $incident_reporter
- . '" })-[:HAS_NRLS_DATA_TYPE]->ndt RETURN ir';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create unique Incident Reporter relationship
- $last_known_query =
- 'MATCH i:INCIDENT, ir:INCIDENT_REPORTER '
- . 'WHERE i.incident_id = '
- . "'${incident_id}'" . ' '
- . 'AND ir.reporter_level_01 = '
- . "'${incident_reporter}'"
- . ' CREATE UNIQUE '
- . 'ir<-[r:HAS_INCIDENT_REPORTER]-i RETURN r';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create unique Patient node
- $last_known_query =
- 'MATCH ndt:NRLS_DATA_TYPE WHERE ndt.code = '
- . "'Age_at_Incident' CREATE UNIQUE "
- . '(p:PATIENT { patient_age : "'
- . $patient_age
- . '", patient_sex : "'
- . $patient_sex
- . '", patient_ethnicity : "'
- . $patient_ethnicity
- . '" })-[:HAS_NRLS_DATA_TYPE]->ndt RETURN p';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create unique Patient relationship
- $last_known_query =
- 'MATCH i:INCIDENT, p:PATIENT '
- . 'WHERE i.incident_id = '
- . "'${incident_id}'" . ' '
- . 'AND p.patient_age = '
- . "'${patient_age}'" . ' '
- . 'AND p.patient_sex = '
- . "'${patient_sex}'" . ' '
- . 'AND p.patient_ethnicity = '
- . "'${patient_ethnicity}'"
- . ' CREATE UNIQUE '
- . ' p<-[r:HAS_INCIDENT_PATIENT]-i RETURN r';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create unique Degree Of Harm node
- $last_known_query =
- 'MATCH ndt:NRLS_DATA_TYPE WHERE ndt.code = '
- . "'PD09' CREATE UNIQUE "
- . '(doh:DEGREE_OF_HARM { degree_of_harm : "'
- . $degree_of_harm
- . '" })-[:HAS_NRLS_DATA_TYPE]->ndt RETURN doh';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- # Create unique Degree Of Harm relationship
- $last_known_query =
- 'MATCH i:INCIDENT, doh:DEGREE_OF_HARM '
- . 'WHERE i.incident_id = '
- . "'${incident_id}'" . ' '
- . 'AND doh.degree_of_harm = '
- . "'${degree_of_harm}'"
- . ' CREATE UNIQUE '
- . ' doh<-[r:HAS_INCIDENT_DEGREE_OF_HARM]-i RETURN r';
- $neo4p_query = REST::Neo4p::Query->new($last_known_query) or
- die ("Could not create neo4p query from $last_known_query.");
- $neo4p_query->execute() or die ("Could not execute query $last_known_query.");
- destroy_query ($neo4p_query);
- } catch {
- my $current_row_number = $row->row_number() + 1;
- say STDERR "Failure at row number $current_row_number in $input_file : $_.";
- say STDERR "Last known query $last_known_query."
- if (defined($last_known_query));
- if (defined($neo4p_query) and defined($neo4p_query->errstr)) {
- say STDERR "Last known neo4p query error: " , $neo4p_query->errstr;
- }
- foreach my $required_column (@required_columns) {
- if (defined($header_map{$required_column}) and
- defined($values[$header_map{$required_column}])) {
- say STDERR "Column $required_column : " , $values[$header_map{$required_column}];
- }
- }
- };
- unless ($row->row_number() % 500) {
- say "Processed " , $row->row_number() , " rows @ " , scalar localtime;
- }
- }
- sub destroy_query {
- my $query = shift;
- if (defined($query)) {
- while (my $response = $query->fetch()) {
- # Who cares, throw it out
- }
- undef $query; # Destroy it
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment