Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl -w
- use strict;
- use 5.010;
- # Required for parsing NRLS data
- use Excel::Reader::XLSX;
- use DateTime::Format::Excel;
- # Required for inputing data into Neo4j
- use REST::Neo4p;
- use Data::Dumper;
- use Getopt::Long;
- use Try::Tiny;
- $| = 1;
- my ($input_file, $debug);
- my $neo_uri = 'http://localhost:7474';
- GetOptions (
- "input_file=s" => \$input_file,
- "neo_uri=s" => \$neo_uri,
- "debug:s" => \$debug,);
- if (!defined($input_file)) {
- die "Usage: $0 --input_file <xlsx file> [--neo_uri neo4j_uri] [--debug]";
- }
- # Define required column headers
- my @required_columns = (
- "IN07", # Incident description
- "PD09", # Indident degree of harm
- "RP07", # NHS Trust
- "IN05_lvl1", # Incident category
- "PD05_lvl1", # Speciality
- "PD02", # Patient sex
- "IN01", # Incident date
- "IN02_A_01", # Incident hour
- "Age_at_Incident", # Patient age
- "PD11", # Patient ethnicity
- "ST01_LVL1", # Recorder staff type
- "IN03_lvl1", # Incident location
- "incidentID", # Unique incident ID
- );
- my $reader = Excel::Reader::XLSX->new();
- my $workbook = $reader->read_file($input_file) or
- die ("Could not read $input_file.");
- my $worksheet = ($workbook->worksheets())[0] or
- die ("Failed to assign worksheet from workbook.");
- my $column_header_row = $worksheet->next_row() or
- die ("Could not get column header row.");
- my %header_map;
- # Find column positions for required columns
- while (my $cell = $column_header_row->next_cell()) {
- my $cell_value = $cell->value();
- if (grep /$cell_value/, @required_columns) {
- $header_map{$cell_value} = $cell->col();
- }
- }
- # Create connection to Neo4j
- say "Connecting to Neo4j : $neo_uri " , scalar localtime if defined($debug);
- REST::Neo4p->connect($neo_uri) or
- die ("Could not connect to $neo_uri.");
- say "Done connecting to Neo4j : $neo_uri " , scalar localtime if defined($debug);
- # Create queries
- say "Creating REST::Neo4p::Query objects " , scalar localtime if defined($debug);
- # Create NHS Trust
- my $create_nhs_trust_cql =<<'EOCQL';
- MATCH (ndt:NRLS_DATA_TYPE)
- USING INDEX ndt:NRLS_DATA_TYPE(code)
- WHERE
- ndt.code = {code}
- CREATE UNIQUE
- (n:NHS_TRUST { name : {name} })-[:HAS_NRLS_DATA_TYPE]->(ndt)
- RETURN n.name
- EOCQL
- my $create_nhs_trust_query = REST::Neo4p::Query->new($create_nhs_trust_cql)
- or die ("Could not create query for : $create_nhs_trust_cql");
- # Create NHS Trust Location
- my $create_nhs_trust_location_cql =<<'EOCQL';
- MERGE (ntl:NHS_TRUST_LOCATION { location_level_01 : { location_level_01 } })
- RETURN ntl.location_level_01;
- EOCQL
- my $create_nhs_trust_location_query =
- REST::Neo4p::Query->new($create_nhs_trust_location_cql)
- or die ("Could not create query for : $create_nhs_trust_location_cql");
- # Create NHS Trust/NHS Trust Location relationship
- my $create_nhs_trust_location_rel_cql =<<'EOCQL';
- MATCH (nt:NHS_TRUST), (ntl:NHS_TRUST_LOCATION)
- USING INDEX nt:NHS_TRUST(name)
- USING INDEX ntl:NHS_TRUST_LOCATION(location_level_01)
- WHERE nt.name = { name }
- AND ntl.location_level_01 = { location_level_01 }
- CREATE UNIQUE
- (nt)-[hntl:HAS_NHS_TRUST_LOCATION]->(ntl) RETURN hntl
- EOCQL
- my $create_nhs_trust_location_rel_query =
- REST::Neo4p::Query->new($create_nhs_trust_location_rel_cql)
- or die ("Could not create query for : $create_nhs_trust_location_rel_cql");
- # Create Incident
- my $create_incident_cql =<<'EOCQL';
- MERGE (i:INCIDENT { incident_id : { incident_id } })
- ON CREATE SET i.incident_timestamp = { incident_timestamp },
- i.incident_description = { incident_description }
- RETURN i.incident_id
- EOCQL
- my $create_incident_query =
- REST::Neo4p::Query->new($create_incident_cql)
- or die ("Could not create query for : $create_incident_cql");
- # Create Incident/NRLS Data Type relationship
- my $create_incident_ndt_rel_cql =<<'EOCQL';
- MATCH (ndt:NRLS_DATA_TYPE), (i:INCIDENT)
- USING INDEX ndt:NRLS_DATA_TYPE(code)
- USING INDEX i:INCIDENT(incident_id)
- WHERE i.incident_id = { incident_id }
- AND ndt.code = { code }
- CREATE UNIQUE
- (i)-[hndt:HAS_NRLS_DATA_TYPE]->(ndt)
- RETURN hndt
- EOCQL
- my $create_incident_ndt_rel_query =
- REST::Neo4p::Query->new($create_incident_ndt_rel_cql)
- or die ("Could not create query for : $create_incident_ndt_rel_cql");
- # Create Incident/NHS Trust relationship
- my $create_incident_nhs_trust_rel_cql =<<'EOCQL';
- MATCH (n:NHS_TRUST), (i:INCIDENT)
- USING INDEX n:NHS_TRUST(name)
- USING INDEX i:INCIDENT(incident_id)
- WHERE n.name = { name }
- AND i.incident_id = { incident_id }
- CREATE UNIQUE
- (i)-[r:IS_NHS_TRUST_INCIDENT]->(n)
- RETURN r
- EOCQL
- my $create_incident_nhs_trust_rel_query =
- REST::Neo4p::Query->new($create_incident_nhs_trust_rel_cql)
- or die ("Could not create query for : $create_incident_nhs_trust_rel_cql");
- # Create Incident/NHS Trust Location relationship
- my $create_incident_ntl_rel_cql =<<'EOCQL';
- MATCH (ntl:NHS_TRUST_LOCATION), (i:INCIDENT)
- USING INDEX ntl:NHS_TRUST_LOCATION(location_level_01)
- USING INDEX i:INCIDENT(incident_id)
- WHERE ntl.location_level_01 = { location_level_01 }
- AND i.incident_id = { incident_id }
- CREATE UNIQUE
- (i)-[r:IS_NHS_TRUST_LOCATION_INCIDENT]->(ntl)
- RETURN r
- EOCQL
- my $create_incident_ntl_rel_query =
- REST::Neo4p::Query->new($create_incident_ntl_rel_cql)
- or die ("Could not create query for : $create_incident_ntl_rel_cql");
- # Create Incident Category
- my $create_incident_category_cql =<<'EOCQL';
- MATCH (ndt:NRLS_DATA_TYPE)
- USING INDEX ndt:NRLS_DATA_TYPE(code)
- WHERE ndt.code = { code }
- CREATE UNIQUE
- (ic:INCIDENT_CATEGORY { category_level_01 : { category_level_01 }
- })-[:HAS_NRLS_DATA_TYPE]->(ndt)
- RETURN ic.category_level_01
- EOCQL
- my $create_incident_category_query =
- REST::Neo4p::Query->new($create_incident_category_cql)
- or die ("Could not create query for : $create_incident_category_cql");
- # Create Incident/Incident Category relationship
- my $create_incident_category_rel_cql =<<'EOCQL';
- MATCH (i:INCIDENT), (ic:INCIDENT_CATEGORY)
- USING INDEX ic:INCIDENT_CATEGORY(category_level_01)
- USING INDEX i:INCIDENT(incident_id)
- WHERE i.incident_id = { incident_id }
- AND ic.category_level_01 = { category_level_01 }
- CREATE UNIQUE
- (i)-[r:HAS_INCIDENT_CATEGORY]->(ic)
- RETURN r
- EOCQL
- my $create_incident_category_rel_query =
- REST::Neo4p::Query->new($create_incident_category_rel_cql)
- or die ("Could not create query for : $create_incident_category_rel_cql");
- # Create Incident Speciality
- my $create_incident_speciality_cql =<<'EOCQL';
- MATCH (ndt:NRLS_DATA_TYPE)
- USING INDEX ndt:NRLS_DATA_TYPE(code)
- WHERE ndt.code = { code }
- CREATE UNIQUE
- (is:INCIDENT_SPECIALITY { speciality_level_01 : { speciality_level_01 }
- })-[:HAS_NRLS_DATA_TYPE]->(ndt)
- RETURN is.speciality_level_01
- EOCQL
- my $create_incident_speciality_query =
- REST::Neo4p::Query->new($create_incident_speciality_cql)
- or die ("Could not create query for : $create_incident_speciality_cql");
- # Create Incident/Incident Speciality relationship
- my $create_incident_speciality_rel_cql =<<'EOCQL';
- MATCH (i:INCIDENT), (is:INCIDENT_SPECIALITY)
- USING INDEX is:INCIDENT_SPECIALITY(speciality_level_01)
- USING INDEX i:INCIDENT(incident_id)
- WHERE i.incident_id = { incident_id }
- AND is.speciality_level_01 = { speciality_level_01 }
- CREATE UNIQUE
- (i)-[r:HAS_INCIDENT_SPECIALITY]->(is)
- RETURN r
- EOCQL
- my $create_incident_speciality_rel_query =
- REST::Neo4p::Query->new($create_incident_speciality_rel_cql)
- or die ("Could not create query for : $create_incident_speciality_rel_cql");
- # Create Incident Reporter
- my $create_incident_reporter_cql =<<'EOCQL';
- MATCH (ndt:NRLS_DATA_TYPE)
- USING INDEX ndt:NRLS_DATA_TYPE(code)
- WHERE ndt.code = { code }
- CREATE UNIQUE
- (ir:INCIDENT_REPORTER { reporter_level_01 : { reporter_level_01 }
- })-[:HAS_NRLS_DATA_TYPE]->(ndt)
- RETURN ir.reporter_level_01
- EOCQL
- my $create_incident_reporter_query =
- REST::Neo4p::Query->new($create_incident_reporter_cql)
- or die ("Could not create query for : $create_incident_reporter_cql");
- # Create Incident/Incident Reporter relationship
- my $create_incident_reporter_rel_cql =<<'EOCQL';
- MATCH (i:INCIDENT), (ir:INCIDENT_REPORTER)
- USING INDEX ir:INCIDENT_REPORTER(reporter_level_01)
- USING INDEX i:INCIDENT(incident_id)
- WHERE i.incident_id = { incident_id }
- AND ir.reporter_level_01 = { reporter_level_01 }
- CREATE UNIQUE
- (i)-[r:HAS_INCIDENT_REPORTER]->(ir)
- RETURN r
- EOCQL
- my $create_incident_reporter_rel_query =
- REST::Neo4p::Query->new($create_incident_reporter_rel_cql)
- or die ("Could not create query for : $create_incident_reporter_rel_cql");
- # Create Incident Patient
- my $create_incident_patient_cql =<<'EOCQL';
- MATCH (ndt:NRLS_DATA_TYPE)
- USING INDEX ndt:NRLS_DATA_TYPE(code)
- WHERE ndt.code = { code }
- CREATE UNIQUE
- (p:PATIENT {
- patient_age : { patient_age },
- patient_sex : { patient_sex },
- patient_ethnicity : { patient_ethnicity }
- })-[:HAS_NRLS_DATA_TYPE]->(ndt)
- RETURN p.patient_age
- EOCQL
- my $create_incident_patient_query =
- REST::Neo4p::Query->new($create_incident_patient_cql)
- or die ("Could not create query for : $create_incident_patient_cql");
- # Create Incident/Incident Patient relationship
- my $create_incident_patient_rel_cql =<<'EOCQL';
- MATCH (i:INCIDENT), (p:PATIENT)
- WHERE i.incident_id = { incident_id }
- AND p.patient_age = { patient_age }
- AND p.patient_sex = { patient_sex }
- AND p.patient_ethnicity = { patient_ethnicity }
- CREATE UNIQUE
- (i)-[r:HAS_INCIDENT_PATIENT]->(p)
- RETURN r
- EOCQL
- my $create_incident_patient_rel_query =
- REST::Neo4p::Query->new($create_incident_patient_rel_cql)
- or die ("Could not create query for : $create_incident_patient_rel_cql");
- # Create Degree Of Harm
- my $create_incident_doh_cql =<<'EOCQL';
- MATCH (ndt:NRLS_DATA_TYPE)
- USING INDEX ndt:NRLS_DATA_TYPE(code)
- WHERE ndt.code = { code }
- CREATE UNIQUE
- (doh:DEGREE_OF_HARM { degree_of_harm : { degree_of_harm }
- })-[:HAS_NRLS_DATA_TYPE]->(ndt) RETURN doh.degree_of_harm
- EOCQL
- my $create_incident_doh_query =
- REST::Neo4p::Query->new($create_incident_doh_cql)
- or die ("Could not create query for : $create_incident_doh_cql");
- # Create Incident/Degree Of Harm relationship
- my $create_incident_doh_rel_cql =<<'EOCQL';
- MATCH (i:INCIDENT), (doh:DEGREE_OF_HARM)
- USING INDEX doh:DEGREE_OF_HARM(degree_of_harm)
- USING INDEX i:INCIDENT(incident_id)
- WHERE i.incident_id = { incident_id }
- AND doh.degree_of_harm = { degree_of_harm }
- CREATE UNIQUE
- (i)-[r:HAS_INCIDENT_DEGREE_OF_HARM]->(doh) RETURN r
- EOCQL
- my $create_incident_doh_rel_query =
- REST::Neo4p::Query->new($create_incident_doh_rel_cql)
- or die ("Could not create query for : $create_incident_doh_rel_cql");
- say "Done creating REST::Neo4p::Query objects " , scalar localtime if defined($debug);
- # Place holders
- my (%nodes, %relationships, %created_nhs_trust_location);
- my $created_nhs_trust_node;
- say "Beginning processing loop " , scalar localtime if defined($debug);
- my $track_count = defined($debug) ? 100 : 500;
- while (my $row = $worksheet->next_row) {
- my @values = $row->values();
- if (@values == 0) {
- warn ("Row at ", $row->row_number() , " contains no values.");
- }
- @values = map {
- if ($_ =~ /^ ?$/ or !defined($_)) {
- $_ = "NOT_DEFINED";
- } else {
- $_;
- }
- } @values;
- my ($nhs_trust, $nhs_trust_location, $degree_of_harm,
- $patient_age, $patient_sex, $patient_ethnicity,
- $incident_id, $incident_date,
- $incident_hour, $incident_description,
- $incident_category, $incident_reporter,
- $incident_speciality);
- my ($last_known_query, $neo4p_query, $need_rollback);
- try {
- $nhs_trust = $values[$header_map{"RP07"}];
- $nhs_trust_location = $values[$header_map{"IN03_lvl1"}];
- $degree_of_harm = $values[$header_map{"PD09"}];
- $patient_age = $values[$header_map{"Age_at_Incident"}];
- $patient_sex = $values[$header_map{"PD02"}];
- $patient_ethnicity = $values[$header_map{"PD11"}];
- $incident_id = $values[$header_map{"incidentID"}];
- $incident_date = $values[$header_map{"IN01"}];
- $incident_hour = $values[$header_map{"IN02_A_01"}];
- $incident_description = $values[$header_map{"IN07"}];
- $incident_category = $values[$header_map{"IN05_lvl1"}];
- $incident_reporter = $values[$header_map{"ST01_LVL1"}];
- $incident_speciality = $values[$header_map{"PD05_lvl1"}];
- # Normalize double quotes in incident description
- $incident_description =~ s/"/\\"/g;
- # Assign unique trust id just to ensure uniqueness on set
- my $trust_incident_id = $incident_id . "_" . $nhs_trust;
- # Normalize incident hour
- if ($incident_hour eq 'NOT_DEFINED') {
- $incident_hour = '00';
- } else {
- $incident_hour = substr($incident_hour, 0, 2);
- }
- # Normalize date
- my $incident_timestamp = DateTime::Format::Excel
- ->parse_datetime($incident_date)->ymd . "T" .
- $incident_hour . ":00:00";
- # Begin transaction
- REST::Neo4p->begin_work;
- $need_rollback = 1;
- # Build nodes
- # Create nodes
- # Create unique NHS Trust node
- if (!defined($created_nhs_trust_node)) {
- $created_nhs_trust_node = 1;
- ($neo4p_query, $last_known_query) =
- ($create_nhs_trust_query, $create_nhs_trust_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "code" => "RP07", "name" => $nhs_trust });
- }
- if (!defined($created_nhs_trust_location{$nhs_trust_location})) {
- # Create unique NHS Trust Location node
- ($neo4p_query, $last_known_query) =
- ($create_nhs_trust_location_query
- , $create_nhs_trust_location_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "location_level_01" => $nhs_trust_location });
- # Create relationship between NHS Trust/Location
- ($neo4p_query, $last_known_query) =
- ($create_nhs_trust_location_rel_query
- , $create_nhs_trust_location_rel_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- {"name" => $nhs_trust, "location_level_01" => $nhs_trust_location });
- $created_nhs_trust_location{$nhs_trust_location} = 1;
- }
- # Create unique Incident node
- ($neo4p_query, $last_known_query) =
- ($create_incident_query, $create_incident_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "incident_id" => $incident_id
- , "incident_timestamp" => $incident_timestamp
- , "incident_description" => $incident_description });
- # Create unique Incident node NRLS_DATA_TYPE relationship ('INO7')
- ($neo4p_query, $last_known_query) =
- ($create_incident_ndt_rel_query, $create_incident_ndt_rel_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "incident_id" => $incident_id, "code" => "IN07" });
- # Create relationship to NHS Trust ${nhs_trust} ${incident_id}
- ($neo4p_query, $last_known_query) =
- ($create_incident_nhs_trust_rel_query
- , $create_incident_nhs_trust_rel_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "name" => $nhs_trust, "incident_id" => $incident_id });
- # Create relationship to NHS Trust Location
- ($neo4p_query, $last_known_query) =
- ($create_incident_ntl_rel_query, $create_incident_ntl_rel_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "location_level_01" => $nhs_trust_location
- , "incident_id" => $incident_id });
- # Create unique Incident Category node
- ($neo4p_query, $last_known_query) =
- ($create_incident_category_query, $create_incident_category_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "code" => "IN05_lvl1"
- , "category_level_01" => $incident_category });
- # Create unique Incident Category relationship
- ($neo4p_query, $last_known_query) =
- ($create_incident_category_rel_query
- , $create_incident_category_rel_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "incident_id" => $incident_id
- , "category_level_01" => $incident_category });
- # Create unique Incident Speciality node
- ($neo4p_query, $last_known_query) =
- ($create_incident_speciality_query, $create_incident_speciality_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "code" => "PD05_lvl1"
- , "speciality_level_01" => $incident_speciality });
- # Create unique Incident Speciality relationship
- ($neo4p_query, $last_known_query) =
- ($create_incident_speciality_rel_query
- , $create_incident_speciality_rel_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "incident_id" => $incident_id
- , "speciality_level_01" => $incident_speciality });
- # Create unique Incident Reporter node
- ($neo4p_query, $last_known_query) =
- ($create_incident_reporter_query, $create_incident_reporter_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "code" => "ST01_LVL1"
- , "reporter_level_01" => $incident_reporter });
- # Create unique Incident Reporter relationship
- ($neo4p_query, $last_known_query) =
- ($create_incident_reporter_rel_query
- , $create_incident_reporter_rel_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "incident_id" => $incident_id
- , "reporter_level_01" => $incident_reporter });
- # Create unique Patient node
- ($neo4p_query, $last_known_query) =
- ($create_incident_patient_query
- , $create_incident_patient_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "code" => "Age_at_Incident"
- , "patient_age" => $patient_age
- , "patient_sex" => $patient_sex
- , "patient_ethnicity" => $patient_ethnicity});
- # Create unique Incident Patient relationship
- ($neo4p_query, $last_known_query) =
- ($create_incident_patient_rel_query
- , $create_incident_patient_rel_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "incident_id" => $incident_id
- , "patient_age" => $patient_age
- , "patient_sex" => $patient_sex
- , "patient_ethnicity" => $patient_ethnicity });
- # Create unique Degree Of Harm node
- ($neo4p_query, $last_known_query) =
- ($create_incident_doh_query
- , $create_incident_doh_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "code" => "PD09"
- , "degree_of_harm" => $degree_of_harm });
- # Create unique Degree Of Harm relationship
- ($neo4p_query, $last_known_query) =
- ($create_incident_doh_rel_query
- , $create_incident_doh_rel_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "incident_id" => $incident_id
- , "degree_of_harm" => $degree_of_harm});
- REST::Neo4p->commit
- or die("Could not commit transaction " . $neo4p_query->errstr);
- my $results = REST::Neo4p->_tx_results; # Flush results
- } catch {
- my $current_row_number = $row->row_number() + 1;
- say STDERR "Failure at row number $current_row_number in $input_file : $_.";
- say STDERR "Last known query $last_known_query."
- if (defined($last_known_query));
- if (defined($neo4p_query) and defined($neo4p_query->errstr)) {
- say STDERR "Last known neo4p query error: " , $neo4p_query->errstr;
- }
- foreach my $required_column (@required_columns) {
- if (defined($header_map{$required_column}) and
- defined($values[$header_map{$required_column}])) {
- say STDERR "Column $required_column : " , $values[$header_map{$required_column}];
- }
- }
- REST::Neo4p->rollback if defined($need_rollback);
- };
- unless ($row->row_number() % $track_count) {
- say "Processed " , $row->row_number() , " rows. " , scalar localtime;
- }
- }
- say "Completed processing loop " , scalar localtime if defined($debug);
- sub run_query {
- my ($cql, $query, $params) = @_;
- if (defined($debug) && $debug eq '10') {
- my $cql_with_params = $cql;
- my %params = %{$params};
- foreach my $key (keys(%params)) {
- $cql_with_params =~ s/\{ $key \}/'$params{$key}'/;
- }
- say "Attempting to execute statement $cql_with_params "
- , scalar localtime;
- }
- $query->execute($params)
- or die("Could not execute query $cql");
- my $response = $query->fetch
- or die("Could not fetch response for $cql");
- }
Advertisement
Add Comment
Please, Sign In to add comment