Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php // http://stackoverflow.com/questions/37719160/compare-the-data-of-two-csv-file
- include __DIR__ .'/__bootstrap__.php';
- //this shows the name, email, deposit date and amount from payment.csv
- $master[] = array('name' => 'First last', 'whenDone' => '2016-03-03', 'amount' => 12000, 'email' => 'sample@y.com', );
- $master[] = array('name' => 'John Matched', 'whenDone' => '2016-04-01', 'amount' => 12345, 'email' => 'johnMatched@y.com');
- $master[] = array('name' => 'Jane Unmatched', 'whenDone' => '2016-05-02', 'amount' => 12345, 'email' => 'janeUnmatched@y.com');
- $master[] = array('name' => 'Jane Matched', 'whenDone' => '2016-04-01', 'amount' => 12345, 'email' => 'janeMatched@y.com');
- $transaction[] = array('name' => 'Mary Lamb', 'whenDone' => '2016-03-04', 'amount' => 12000, 'email' => 'maryl@y.com');
- $transaction[] = array('name' => 'John Doe', 'whenDone' => '2016-04-01', 'amount' => 12345, 'email' => 'johndoe@y.com');
- $transaction[] = array('name' => 'micky mean', 'whenDone' => '2016-04-01', 'amount' => 12345, 'email' => 'mickym@y.com');
- // \Kint::dump($master, $transaction);
- echo '<pre>Master Source...', PHP_EOL;
- print_r($master);
- echo 'Transaction Source... ', PHP_EOL;
- print_r($transaction);
- echo '</pre>';
- /* --------------------------------------------------------------------------------------
- * Run the code...
- */
- // No need to change the code (yours or mine) :)
- // !!!! You can pass the names of the fields to be used to generate the key
- $match = new HashMatch($master,
- $transaction,
- array('whenDone', 'amount'));
- $match->generateMatches();
- // print output...
- /* --------------------------------------------------------------------------------------
- * Generated Hash Master List
- */
- echo '<pre>Hash Master ... ', PHP_EOL;
- print_r($match->getHashMasterList());
- echo '</pre>';
- /* --------------------------------------------------------------------------------------
- * Generated Hash Transaction List that Matches with a master
- */
- echo '<pre>Hash Transactions ... ', PHP_EOL;
- print_r($match->getHashTransactionList());
- echo '</pre>';
- /* --------------------------------------------------------------------------------------
- * Generated Hash List of Masters that match with other masters
- */
- echo '<pre>Hash Master Records with multiple Matching Masters ... ', PHP_EOL;
- print_r($match->getHashMatchedMasterList());
- echo '</pre>';
- /* --------------------------------------------------------------------------------------
- * Matching Master And transaction Records ALL with the same hash:
- * 1) A list of Master Records
- * 2) A list of Transaction Records
- *
- * i.e. it converts the hash key back into record ids :)
- */
- foreach($match->getHashTransactionList() as $hash => $recordIds) {
- echo '<pre>Matching Master to Transaction... ', PHP_EOL;
- print_r($match->getMatchedRecords($hash));
- echo '</pre>';
- }
- // ---------------------------------------------------------------------------------
- class HashMatch {
- /*
- * Generate a MD5 hash for each master and tranasaction using some
- * of the data fields as the string to be hashed.
- */
- /**
- * Master source records
- *
- * @var array
- */
- private $master = null;
- /**
- * Transaction Source records must have the same field names as the master
- * of the indexes that are used to generate the MD5 hash
- *
- * @var array
- */
- private $transaction = null;
- /**
- * The generated MD5 hash is the key in the Master source records.
- *
- * Each record has a list of other Master Record Ids that also have the same hash
- *
- * @var array
- */
- private $hashMaster = array();
- /**
- * The generated MD5 hash is the key in the Transaction source records.
- *
- * Each record has a list of other Transaction Record Ids that also have the same hash
- *
- * @var array
- */
- private $hashTransaction = array();
- /**
- * Specify which index names to use from the supplied data record arrays
- * to generate the MD5 hash with.
- *
- * @var array
- */
- private $keyNames = array();
- /**
- * Generate a MD5 hash for each master and transaction using some
- * of the data fields as the string to be hashed.
- *
- * You can pass an array of field names to used to generate the key.
- *
- * This allows any records to be used in this class as you just provide
- * the list of names to generate the MD5 hash
- *
- *
- * @param array $master
- * @param array $transaction
- * @param array $keyNames
- *
- * @return void
- */
- public function __construct(array $master,
- array $transaction,
- array $keyNames = array('when', 'amount'))
- {
- $this->master = $master;
- $this->transaction = $transaction;
- $this->keyNames = $keyNames;
- }
- /**
- * Generate all the Hashes and store all the matching details
- *
- * @return bool
- */
- public function generateMatches()
- {
- $this->processMaster();
- $this->processTransaction();
- return !empty($this->hashMaster) && !empty($this->hashTransaction);
- }
- /**
- * Generate a list of MD5 hashes as a key
- *
- * Keep a list of other master records with the same hash
- *
- * @return void
- */
- public function processMaster()
- {
- foreach ($this->master as $recordId => $data) {
- $hash = $this->generateHash($data);
- if (empty($this->hashMaster[$hash])) { // add it...
- $this->hashMaster[$hash]['masterId'] = $recordId;
- $this->hashMaster[$hash]['matchIds'] = array($recordId);
- }
- else { // is a duplicate so add to the match list
- $this->hashMaster[$hash]['matchIds'][] = $recordId;
- }
- }
- }
- /**
- * Generate a list of MD5 hashes as a key for the Transaction source
- *
- * Match the hashes against the master list and record if there is a match
- *
- * @return void
- */
- public function processTransaction()
- {
- foreach ($this->transaction as $recordId => $data) {
- $hash = $this->generateHash($data);
- if (empty($this->hashMaster[$hash])) { // skip this record
- continue;
- }
- // record a match with the master
- if (empty($this->hashTransaction[$hash])) { // new record
- $this->hashTransaction[$hash]['masterId'] = $this->hashMaster[$hash]['masterId'];
- $this->hashTransaction[$hash]['matchIds'] = array();
- }
- // add to the list of matches
- $this->hashTransaction[$hash]['matchIds'][] = $recordId;
- }
- }
- /**
- * Return Master MD5 list
- *
- * The keys are unique, however there are extra values:
- *
- * 'masterId' ==> The first record in the array with this key
- *
- * 'matchIds' ==> A *complete* list of all the master records that have this key.
- * Yes, it includes itself, this allows you to just use this list
- * when reporting.
- *
- * @return array
- */
- public function getHashMasterList()
- {
- return $this->hashMaster;
- }
- /**
- * Return Master MD5 list with more that one matching master
- *
- * i.e. duplicate master records with the same hash
- *
- * @return array
- */
- public function getHashMatchedMasterList()
- {
- $out = array();
- foreach ($this->hashMaster as $key => $item) {
- if (count($item['matchIds']) >= 2) {
- $out[$key] = $item;
- }
- }
- return $out;
- }
- /**
- * All the tranasactions that matched a master record
- *
- * @return array
- */
- public function getHashTransactionList()
- {
- return $this->hashTransaction;
- }
- /**
- * given a master hash then return the details as:
- *
- * i.e. this converts a hash key back into source records for processing.
- *
- * 1) A list of matching master records
- *
- * e.g. $out['master'][] ...
- *
- *
- * 2) A list of matching transaction records
- *
- * e.g. $out['transaction'][] ...
- *
- * @param string $hash
- *
- * @return array
- */
- public function getMatchedRecords($hash)
- {
- $out = array('key' => $hash,
- 'master' => array(),
- 'transaction' => array(),
- );
- if (!empty($this->hashMaster[$hash])) { // just in case is invalid hash
- foreach ($this->hashMaster[$hash]['matchIds'] as $recordId) {
- $out['master'][] = $this->master[$recordId];
- }
- }
- if (!empty($this->hashTransaction[$hash])) {
- foreach ($this->hashTransaction[$hash]['matchIds'] as $recordId) {
- $out['transaction'][] = $this->transaction[$recordId];
- }
- }
- return $out;
- }
- /**
- * Generate an MD5 hash from the required fields in the data record
- * The columns to use will have been passed in the constructor
- * and found in '$keyNames'
- *
- * It is so you don't have to edit anything to use this class
- *
- * @param array $row
- *
- * @return string
- */
- public function generateHash($row)
- {
- $text = '';
- foreach ($this->keyNames as $name) {
- $text .= $row[$name];
- }
- return Md5($text);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement