Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl -w
- use strict;
- use 5.010;
- # Required for parsing NRLS data
- use Excel::Reader::XLSX;
- use DateTime::Format::Excel;
- # Required for inputing data into Neo4j
- use REST::Neo4p;
- use Data::Dumper;
- use Getopt::Long;
- use Try::Tiny;
- $| = 1;
- my $input_file;
- my $neo_uri = 'http://larva.med.ic.ac.uk:7474';
- GetOptions (
- "input_file=s" => \$input_file,
- "neo_uri=s" => \$neo_uri,);
- if (!defined($input_file)) {
- die "Usage: $0 --input_file <xlsx file>";
- }
- # Define required column headers
- my @required_columns = (
- "IN07", # Incident description
- "PD09", # Indident degree of harm
- "RP07", # NHS Trust
- "IN05_lvl1", # Incident category
- "PD05_lvl1", # Speciality
- "PD02", # Patient sex
- "IN01", # Incident date
- "IN02_A_01", # Incident hour
- "Age_at_Incident", # Patient age
- "PD11", # Patient ethnicity
- "ST01_LVL1", # Recorder staff type
- "IN03_lvl1", # Incident location
- "incidentID", # Unique incident ID
- );
- my $reader = Excel::Reader::XLSX->new();
- my $workbook = $reader->read_file($input_file) or
- die ("Could not read $input_file.");
- my $worksheet = ($workbook->worksheets())[0] or
- die ("Failed to assign worksheet from workbook.");
- my $column_header_row = $worksheet->next_row() or
- die ("Could not get column header row.");
- my %header_map;
- # Find column positions for required columns
- while (my $cell = $column_header_row->next_cell()) {
- my $cell_value = $cell->value();
- if (grep /$cell_value/, @required_columns) {
- $header_map{$cell_value} = $cell->col();
- }
- }
- # Create connection to Neo4j
- REST::Neo4p->connect($neo_uri) or
- die ("Could not connect to $neo_uri.");
- # Create queries
- # Create NHS Trust
- my $create_nhs_trust_cql =<<'EOCQL';
- MATCH (ndt:NRLS_DATA_TYPE)
- WHERE
- ndt.code = {code}
- CREATE UNIQUE
- (n:NHS_TRUST { name : {name} })-[:HAS_NRLS_DATA_TYPE]->(ndt)
- RETURN n
- EOCQL
- my $create_nhs_trust_query = REST::Neo4p::Query->new($create_nhs_trust_cql)
- or die ("Could not create query for : $create_nhs_trust_cql");
- # Create NHS Trust Location
- my $create_nhs_trust_location_cql =<<'EOCQL';
- MERGE (ntl:NHS_TRUST_LOCATION{ location_level_01 : { location_level_01 } })
- RETURN ntl;
- EOCQL
- my $create_nhs_trust_location_query =
- REST::Neo4p::Query->new($create_nhs_trust_location_cql)
- or die ("Could not create query for : $create_nhs_trust_location_cql");
- # Create NHS Trust/NHS Trust Location relationship
- my $create_nhs_trust_location_rel_cql =<<'EOCQL';
- MATCH (nt:NHS_TRUST), (ntl:NHS_TRUST_LOCATION)
- WHERE nt.name = { name }
- AND ntl.location_level_01 = { location_level_01 }
- CREATE UNIQUE
- (ntl)<-[hntl:HAS_NHS_TRUST_LOCATION]-nt RETURN hntl
- EOCQL
- my $create_nhs_trust_location_rel_query =
- REST::Neo4p::Query->new($create_nhs_trust_location_rel_cql)
- or die ("Could not create query for : $create_nhs_trust_location_rel_cql");
- # Create Incident
- my $create_incident_cql =<<'EOCQL';
- MERGE (i:INCIDENT { incident_id : { incident_id } })
- ON CREATE SET i.incident_timestamp = { incident_timestamp },
- i.incident_description = { incident_description }
- RETURN i
- EOCQL
- my $create_incident_query =
- REST::Neo4p::Query->new($create_incident_cql)
- or die ("Could not create query for : $create_incident_cql");
- # Create Incident/NRLS Data Type relationship
- my $create_incident_ndt_rel_cql =<<'EOCQL';
- MATCH (i:INCIDENT), (ndt:NRLS_DATA_TYPE)
- WHERE i.incident_id = { incident_id }
- AND ndt.code = { code }
- CREATE UNIQUE
- (i)-[hndt:HAS_NRLS_DATA_TYPE]->ndt
- RETURN hndt
- EOCQL
- my $create_incident_ndt_rel_query =
- REST::Neo4p::Query->new($create_incident_ndt_rel_cql)
- or die ("Could not create query for : $create_incident_ndt_rel_cql");
- # Create Incident/NHS Trust relationship
- my $create_incident_nhs_trust_rel_cql =<<'EOCQL';
- MATCH (n:NHS_TRUST), (i:INCIDENT)
- WHERE n.name = { name }
- AND i.incident_id = { incident_id }
- CREATE UNIQUE
- i-[r:IS_NHS_TRUST_INCIDENT]->n
- RETURN r
- EOCQL
- my $create_incident_nhs_trust_rel_query =
- REST::Neo4p::Query->new($create_incident_nhs_trust_rel_cql)
- or die ("Could not create query for : $create_incident_nhs_trust_rel_cql");
- # Create Incident/NHS Trust Location relationship
- my $create_incident_ntl_rel_cql =<<'EOCQL';
- MATCH (ntl:NHS_TRUST_LOCATION), (i:INCIDENT)
- WHERE ntl.location_level_01 = { location_level_01 }
- AND i.incident_id = { incident_id }
- CREATE UNIQUE
- i-[r:IS_NHS_TRUST_LOCATION_INCIDENT]->ntl
- RETURN r
- EOCQL
- my $create_incident_ntl_rel_query =
- REST::Neo4p::Query->new($create_incident_ntl_rel_cql)
- or die ("Could not create query for : $create_incident_ntl_rel_cql");
- # Create Incident Category
- my $create_incident_category_cql =<<'EOCQL';
- MATCH (ndt:NRLS_DATA_TYPE)
- WHERE ndt.code = { code }
- CREATE UNIQUE
- (ic:INCIDENT_CATEGORY { category_level_01 : { category_level_01 }
- })-[:HAS_NRLS_DATA_TYPE]->ndt
- RETURN ic
- EOCQL
- my $create_incident_category_query =
- REST::Neo4p::Query->new($create_incident_category_cql)
- or die ("Could not create query for : $create_incident_category_cql");
- # Create Incident/Incident Category relationship
- my $create_incident_category_rel_cql =<<'EOCQL';
- MATCH (i:INCIDENT), (ic:INCIDENT_CATEGORY)
- WHERE i.incident_id = { incident_id }
- AND ic.category_level_01 = { category_level_01 }
- CREATE UNIQUE
- ic<-[r:HAS_INCIDENT_CATEGORY]-i
- RETURN r
- EOCQL
- my $create_incident_category_rel_query =
- REST::Neo4p::Query->new($create_incident_category_rel_cql)
- or die ("Could not create query for : $create_incident_category_rel_cql");
- # Create Incident Speciality
- my $create_incident_speciality_cql =<<'EOCQL';
- MATCH (ndt:NRLS_DATA_TYPE)
- WHERE ndt.code = { code }
- CREATE UNIQUE
- (is:INCIDENT_SPECIALITY { speciality_level_01 : { speciality_level_01 }
- })-[:HAS_NRLS_DATA_TYPE]->ndt
- RETURN is
- EOCQL
- my $create_incident_speciality_query =
- REST::Neo4p::Query->new($create_incident_speciality_cql)
- or die ("Could not create query for : $create_incident_speciality_cql");
- # Create Incident/Incident Speciality relationship
- my $create_incident_speciality_rel_cql =<<'EOCQL';
- MATCH (i:INCIDENT), (is:INCIDENT_SPECIALITY)
- WHERE i.incident_id = { incident_id }
- AND is.speciality_level_01 = { speciality_level_01 }
- CREATE UNIQUE
- is<-[r:HAS_INCIDENT_SPECIALITY]-i
- RETURN r
- EOCQL
- my $create_incident_speciality_rel_query =
- REST::Neo4p::Query->new($create_incident_speciality_rel_cql)
- or die ("Could not create query for : $create_incident_speciality_rel_cql");
- # Create Incident Reporter
- my $create_incident_reporter_cql =<<'EOCQL';
- MATCH (ndt:NRLS_DATA_TYPE)
- WHERE ndt.code = { code }
- CREATE UNIQUE
- (ir:INCIDENT_REPORTER { reporter_level_01 : { reporter_level_01 }
- })-[:HAS_NRLS_DATA_TYPE]->ndt
- RETURN ir
- EOCQL
- my $create_incident_reporter_query =
- REST::Neo4p::Query->new($create_incident_reporter_cql)
- or die ("Could not create query for : $create_incident_reporter_cql");
- # Create Incident/Incident Reporter relationship
- my $create_incident_reporter_rel_cql =<<'EOCQL';
- MATCH (i:INCIDENT), (ir:INCIDENT_REPORTER)
- WHERE i.incident_id = { incident_id }
- AND ir.reporter_level_01 = { reporter_level_01 }
- CREATE UNIQUE
- ir<-[r:HAS_INCIDENT_REPORTER]-i
- RETURN r
- EOCQL
- my $create_incident_reporter_rel_query =
- REST::Neo4p::Query->new($create_incident_reporter_rel_cql)
- or die ("Could not create query for : $create_incident_reporter_rel_cql");
- # Create Incident Patient
- my $create_incident_patient_cql =<<'EOCQL';
- MATCH (ndt:NRLS_DATA_TYPE)
- WHERE ndt.code = { code }
- CREATE UNIQUE
- (p:PATIENT {
- patient_age : { patient_age },
- patient_sex : { patient_sex },
- patient_ethnicity : { patient_ethnicity }
- })-[:HAS_NRLS_DATA_TYPE]->ndt
- RETURN p
- EOCQL
- my $create_incident_patient_query =
- REST::Neo4p::Query->new($create_incident_patient_cql)
- or die ("Could not create query for : $create_incident_patient_cql");
- # Create Incident/Incident Patient relationship
- my $create_incident_patient_rel_cql =<<'EOCQL';
- MATCH (i:INCIDENT), (p:PATIENT)
- WHERE i.incident_id = { incident_id }
- AND p.patient_age = { patient_age }
- AND p.patient_sex = { patient_sex }
- AND p.patient_ethnicity = { patient_ethnicity }
- CREATE UNIQUE
- p<-[r:HAS_INCIDENT_PATIENT]-i
- RETURN r
- EOCQL
- my $create_incident_patient_rel_query =
- REST::Neo4p::Query->new($create_incident_patient_rel_cql)
- or die ("Could not create query for : $create_incident_patient_rel_cql");
- # Create Degree Of Harm
- my $create_incident_doh_cql =<<'EOCQL';
- MATCH (ndt:NRLS_DATA_TYPE) WHERE ndt.code = { code }
- CREATE UNIQUE
- (doh:DEGREE_OF_HARM { degree_of_harm : { degree_of_harm }
- })-[:HAS_NRLS_DATA_TYPE]->ndt RETURN doh
- EOCQL
- my $create_incident_doh_query =
- REST::Neo4p::Query->new($create_incident_doh_cql)
- or die ("Could not create query for : $create_incident_doh_cql");
- # Create Incident/Degree Of Harm relationship
- my $create_incident_doh_rel_cql =<<'EOCQL';
- MATCH (i:INCIDENT), (doh:DEGREE_OF_HARM)
- WHERE i.incident_id = { incident_id }
- AND doh.degree_of_harm = { degree_of_harm }
- CREATE UNIQUE
- doh<-[r:HAS_INCIDENT_DEGREE_OF_HARM]-i RETURN r
- EOCQL
- my $create_incident_doh_rel_query =
- REST::Neo4p::Query->new($create_incident_doh_rel_cql)
- or die ("Could not create query for : $create_incident_doh_rel_cql");
- # Place holders
- my (%nodes, %relationships, %created_nhs_trust_location);
- my $created_nhs_trust_node;
- while (my $row = $worksheet->next_row) {
- my @values = $row->values();
- if (@values == 0) {
- warn ("Row at ", $row->row_number() , " contains no values.");
- }
- @values = map {
- if ($_ =~ /^ ?$/ or !defined($_)) {
- $_ = "NOT_DEFINED";
- } else {
- $_;
- }
- } @values;
- my ($nhs_trust, $nhs_trust_location, $degree_of_harm,
- $patient_age, $patient_sex, $patient_ethnicity,
- $incident_id, $incident_date,
- $incident_hour, $incident_description,
- $incident_category, $incident_reporter,
- $incident_speciality);
- my ($last_known_query, $neo4p_query);
- try {
- $nhs_trust = $values[$header_map{"RP07"}];
- $nhs_trust_location = $values[$header_map{"IN03_lvl1"}];
- $degree_of_harm = $values[$header_map{"PD09"}];
- $patient_age = $values[$header_map{"Age_at_Incident"}];
- $patient_sex = $values[$header_map{"PD02"}];
- $patient_ethnicity = $values[$header_map{"PD11"}];
- $incident_id = $values[$header_map{"incidentID"}];
- $incident_date = $values[$header_map{"IN01"}];
- $incident_hour = $values[$header_map{"IN02_A_01"}];
- $incident_description = $values[$header_map{"IN07"}];
- $incident_category = $values[$header_map{"IN05_lvl1"}];
- $incident_reporter = $values[$header_map{"ST01_LVL1"}];
- $incident_speciality = $values[$header_map{"PD05_lvl1"}];
- # Normalize double quotes in incident description
- $incident_description =~ s/"/\\"/g;
- # Assign unique trust id just to ensure uniqueness on set
- my $trust_incident_id = $incident_id . "_" . $nhs_trust;
- # Normalize incident hour
- if ($incident_hour eq 'NOT_DEFINED') {
- $incident_hour = '00';
- } else {
- $incident_hour = substr($incident_hour, 0, 2);
- }
- # Normalize date
- my $incident_timestamp = DateTime::Format::Excel
- ->parse_datetime($incident_date)->ymd . "T" .
- $incident_hour . ":00:00";
- # Build nodes
- # Create nodes
- # Create unique NHS Trust node
- if (!defined($created_nhs_trust_node)) {
- $created_nhs_trust_node = 1;
- ($neo4p_query, $last_known_query) =
- ($create_nhs_trust_query, $create_nhs_trust_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "code" => "RP07", "name" => $nhs_trust });
- }
- if (!defined($created_nhs_trust_location{$nhs_trust_location})) {
- # Create unique NHS Trust Location node
- ($neo4p_query, $last_known_query) =
- ($create_nhs_trust_location_query
- , $create_nhs_trust_location_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "location_level_01" => $nhs_trust_location });
- # Create relationship between NHS Trust/Location
- ($neo4p_query, $last_known_query) =
- ($create_nhs_trust_location_rel_query
- , $create_nhs_trust_location_rel_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- {"name" => $nhs_trust, "location_level_01" => $nhs_trust_location });
- $created_nhs_trust_location{$nhs_trust_location} = 1;
- }
- # Create unique Incident node
- ($neo4p_query, $last_known_query) =
- ($create_incident_query, $create_incident_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "incident_id" => $incident_id
- , "incident_timestamp" => $incident_timestamp
- , "incident_description" => $incident_description });
- # Create unique Incident node NRLS_DATA_TYPE relationship ('INO7')
- ($neo4p_query, $last_known_query) =
- ($create_incident_ndt_rel_query, $create_incident_ndt_rel_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "incident_id" => $incident_id, "code" => "IN07" });
- # Create relationship to NHS Trust ${nhs_trust} ${incident_id}
- ($neo4p_query, $last_known_query) =
- ($create_incident_nhs_trust_rel_query
- , $create_incident_nhs_trust_rel_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "name" => $nhs_trust, "incident_id" => $incident_id });
- # Create relationship to NHS Trust Location
- ($neo4p_query, $last_known_query) =
- ($create_incident_ntl_rel_query, $create_incident_ntl_rel_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "location_level_01" => $nhs_trust_location
- , "incident_id" => $incident_id });
- # Create unique Incident Category node
- ($neo4p_query, $last_known_query) =
- ($create_incident_category_query, $create_incident_category_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "code" => "IN05_lvl1"
- , "category_level_01" => $incident_category });
- # Create unique Incident Category relationship
- ($neo4p_query, $last_known_query) =
- ($create_incident_category_rel_query
- , $create_incident_category_rel_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "incident_id" => $incident_id
- , "category_level_01" => $incident_category });
- # Create unique Incident Speciality node
- ($neo4p_query, $last_known_query) =
- ($create_incident_speciality_query, $create_incident_speciality_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "code" => "PD05_lvl1"
- , "speciality_level_01" => $incident_speciality });
- # Create unique Incident Speciality relationship
- ($neo4p_query, $last_known_query) =
- ($create_incident_speciality_rel_query
- , $create_incident_speciality_rel_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "incident_id" => $incident_id
- , "speciality_level_01" => $incident_speciality });
- # Create unique Incident Reporter node
- ($neo4p_query, $last_known_query) =
- ($create_incident_reporter_query, $create_incident_reporter_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "code" => "ST01_LVL1"
- , "reporter_level_01" => $incident_reporter });
- # Create unique Incident Reporter relationship
- ($neo4p_query, $last_known_query) =
- ($create_incident_reporter_rel_query
- , $create_incident_reporter_rel_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "incident_id" => $incident_id
- , "reporter_level_01" => $incident_reporter });
- # Create unique Patient node
- ($neo4p_query, $last_known_query) =
- ($create_incident_patient_query
- , $create_incident_patient_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "code" => "Age_at_Incident"
- , "patient_age" => $patient_age
- , "patient_sex" => $patient_sex
- , "patient_ethnicity" => $patient_ethnicity});
- # Create unique Incident Patient relationship
- ($neo4p_query, $last_known_query) =
- ($create_incident_patient_rel_query
- , $create_incident_patient_rel_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "incident_id" => $incident_id
- , "patient_age" => $patient_age
- , "patient_sex" => $patient_sex
- , "patient_ethnicity" => $patient_ethnicity });
- # Create unique Degree Of Harm node
- ($neo4p_query, $last_known_query) =
- ($create_incident_doh_query
- , $create_incident_doh_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "code" => "PD09"
- , "degree_of_harm" => $degree_of_harm });
- # Create unique Degree Of Harm relationship
- ($neo4p_query, $last_known_query) =
- ($create_incident_doh_rel_query
- , $create_incident_doh_rel_cql);
- &run_query(
- $last_known_query,
- $neo4p_query,
- { "incident_id" => $incident_id
- , "degree_of_harm" => $degree_of_harm});
- } catch {
- my $current_row_number = $row->row_number() + 1;
- say STDERR "Failure at row number $current_row_number in $input_file : $_.";
- say STDERR "Last known query $last_known_query."
- if (defined($last_known_query));
- if (defined($neo4p_query) and defined($neo4p_query->errstr)) {
- say STDERR "Last known neo4p query error: " , $neo4p_query->errstr;
- }
- foreach my $required_column (@required_columns) {
- if (defined($header_map{$required_column}) and
- defined($values[$header_map{$required_column}])) {
- say STDERR "Column $required_column : " , $values[$header_map{$required_column}];
- }
- }
- };
- unless ($row->row_number() % 500) {
- say "Processed " , $row->row_number() , " rows. " , scalar localtime;
- }
- }
- sub run_query {
- my ($cql, $query, $params) = @_;
- $query->execute($params)
- or die("Could not execute query $cql");
- }
Advertisement
Add Comment
Please, Sign In to add comment