Advertisement
rfv123

Q37719160/compare-the-data-of-two-csv-file

Jun 20th, 2016
437
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 9.90 KB | None | 0 0
  1. <?php // http://stackoverflow.com/questions/37719160/compare-the-data-of-two-csv-file
  2.  
  3.  include __DIR__ .'/__bootstrap__.php';
  4.  
  5. //this shows the name, email, deposit date and amount from payment.csv
  6.  
  7. $master[]      = array('name' => 'First last',     'whenDone' => '2016-03-03', 'amount' => 12000,  'email' => 'sample@y.com', );
  8. $master[]      = array('name' => 'John Matched',   'whenDone' => '2016-04-01', 'amount' => 12345,  'email' => 'johnMatched@y.com');
  9. $master[]      = array('name' => 'Jane Unmatched', 'whenDone' => '2016-05-02', 'amount' => 12345,  'email' => 'janeUnmatched@y.com');
  10. $master[]      = array('name' => 'Jane Matched',   'whenDone' => '2016-04-01', 'amount' => 12345,  'email' => 'janeMatched@y.com');
  11.  
  12. $transaction[] = array('name' => 'Mary Lamb',      'whenDone' => '2016-03-04', 'amount' => 12000,  'email'  => 'maryl@y.com');
  13. $transaction[] = array('name' => 'John Doe',       'whenDone' => '2016-04-01', 'amount' => 12345,  'email' => 'johndoe@y.com');
  14. $transaction[] = array('name' => 'micky mean',     'whenDone' => '2016-04-01', 'amount' => 12345,  'email'  => 'mickym@y.com');
  15.  
  16. // \Kint::dump($master, $transaction);
  17.  
  18. echo '<pre>Master Source...', PHP_EOL;
  19.     print_r($master);
  20. echo 'Transaction Source... ', PHP_EOL;
  21.     print_r($transaction);
  22. echo '</pre>';    
  23.  
  24. /* --------------------------------------------------------------------------------------
  25.  *  Run the code...
  26.  */
  27.  
  28. // No need to change the code (yours or mine) :)
  29.  
  30. // !!!! You can pass the names of the fields to be used to generate the key
  31. $match = new HashMatch($master,
  32.                        $transaction,
  33.                        array('whenDone', 'amount'));
  34. $match->generateMatches();
  35.  
  36. // print output...
  37.  
  38. /* --------------------------------------------------------------------------------------
  39.  *  Generated Hash Master List
  40.  */
  41. echo '<pre>Hash Master ... ', PHP_EOL;
  42. print_r($match->getHashMasterList());
  43. echo '</pre>';    
  44.  
  45. /* --------------------------------------------------------------------------------------
  46.  *  Generated Hash Transaction List that Matches with a master
  47.  */
  48. echo '<pre>Hash Transactions ... ', PHP_EOL;
  49. print_r($match->getHashTransactionList());
  50. echo '</pre>';    
  51.  
  52. /* --------------------------------------------------------------------------------------
  53.  *  Generated Hash List of Masters that match with other masters
  54.  */
  55. echo '<pre>Hash Master Records with multiple Matching Masters ... ', PHP_EOL;
  56.     print_r($match->getHashMatchedMasterList());
  57. echo '</pre>';    
  58.  
  59. /* --------------------------------------------------------------------------------------
  60.  *  Matching Master And transaction Records ALL with the same hash:
  61.  *   1) A list of Master Records
  62.  *   2) A list of Transaction Records
  63.  *
  64.  *  i.e. it converts the hash key back into record ids :)  
  65.  */
  66.  
  67. foreach($match->getHashTransactionList() as $hash => $recordIds) {
  68.  
  69.     echo '<pre>Matching Master to Transaction... ', PHP_EOL;
  70.     print_r($match->getMatchedRecords($hash));
  71.     echo '</pre>';    
  72. }
  73.  
  74. // ---------------------------------------------------------------------------------
  75. class HashMatch {
  76.  
  77.    /*
  78.     * Generate a MD5 hash for each master and tranasaction using some
  79.     * of the data fields as the string to be hashed.
  80.     */
  81.  
  82.     /**
  83.     * Master source records
  84.     *
  85.     * @var array
  86.     */
  87.     private $master = null;
  88.    
  89.     /**
  90.     * Transaction Source records must have the same field names as the master
  91.     * of the indexes that are used to generate the MD5 hash
  92.     *
  93.     * @var array
  94.     */
  95.     private $transaction  = null;
  96.  
  97.     /**
  98.     * The generated MD5 hash is the key in the Master source records.
  99.     *
  100.     * Each record has a list of other Master Record Ids that also have the same hash
  101.     *
  102.     * @var array
  103.     */
  104.     private $hashMaster = array();
  105.  
  106.     /**
  107.     * The generated MD5 hash is the key in the Transaction source records.
  108.     *
  109.     * Each record has a list of other Transaction Record Ids that also have the same hash
  110.     *
  111.     * @var array
  112.     */
  113.     private $hashTransaction = array();
  114.    
  115.     /**
  116.     * Specify which index names to use from the supplied data record arrays
  117.     * to generate the MD5 hash with.
  118.     *
  119.     * @var array
  120.     */
  121.     private $keyNames = array();
  122.        
  123.     /**
  124.     * Generate a MD5 hash for each master and transaction using some
  125.     * of the data fields as the string to be hashed.
  126.     *
  127.     * You can pass an array of field names to used to generate the key.
  128.     *
  129.     * This allows any records to be used in this class as you just provide
  130.     * the list of names to generate the MD5 hash
  131.     *  
  132.     *
  133.     * @param array $master
  134.     * @param array $transaction
  135.     * @param array $keyNames
  136.     *
  137.     * @return void
  138.     */    
  139.     public function __construct(array $master,
  140.                                 array $transaction,
  141.                                 array $keyNames = array('when', 'amount'))
  142.     {
  143.         $this->master = $master;
  144.         $this->transaction  = $transaction;
  145.         $this->keyNames = $keyNames;
  146.     }
  147.    
  148.     /**
  149.     * Generate all the Hashes and store all the matching details
  150.     *
  151.     * @return bool
  152.     */    
  153.     public function generateMatches()
  154.     {
  155.         $this->processMaster();
  156.         $this->processTransaction();
  157.         return !empty($this->hashMaster) && !empty($this->hashTransaction);
  158.     }
  159.    
  160.     /**
  161.     * Generate a list of MD5 hashes as a key  
  162.     *
  163.     * Keep a list of other master records with the same hash
  164.     *  
  165.     * @return void
  166.     */    
  167.     public function processMaster()
  168.     {
  169.         foreach ($this->master as $recordId => $data) {
  170.            
  171.             $hash = $this->generateHash($data);
  172.             if (empty($this->hashMaster[$hash])) { // add it...
  173.                 $this->hashMaster[$hash]['masterId'] = $recordId;
  174.                 $this->hashMaster[$hash]['matchIds'] = array($recordId);
  175.             }            
  176.             else { // is a duplicate so add to the match list
  177.                 $this->hashMaster[$hash]['matchIds'][] = $recordId;
  178.             }
  179.         }
  180.     }
  181.  
  182.     /**
  183.     * Generate a list of MD5 hashes as a key for the Transaction source  
  184.     *  
  185.     * Match the hashes against the master list and record if there is a match
  186.     *
  187.     * @return void
  188.     */
  189.     public function processTransaction()
  190.     {        
  191.         foreach ($this->transaction as $recordId => $data) {
  192.             $hash = $this->generateHash($data);
  193.             if (empty($this->hashMaster[$hash])) { // skip this record
  194.                continue;
  195.             }
  196.            
  197.             // record a match with the master
  198.             if (empty($this->hashTransaction[$hash])) { // new record
  199.                 $this->hashTransaction[$hash]['masterId'] = $this->hashMaster[$hash]['masterId'];
  200.                 $this->hashTransaction[$hash]['matchIds']  = array();
  201.             }
  202.              
  203.             // add to the list of matches
  204.             $this->hashTransaction[$hash]['matchIds'][] = $recordId;
  205.         }
  206.     }
  207.  
  208.     /**
  209.     * Return Master MD5 list
  210.     *
  211.     * The keys are unique, however there are extra values:
  212.     *  
  213.     *   'masterId'  ==> The first record in the array with this key
  214.     *
  215.     *   'matchIds'  ==> A *complete* list of all the master records that have this key.
  216.     *                   Yes, it includes itself, this allows you to just use this list
  217.     *                   when reporting.
  218.     *
  219.     * @return array
  220.     */
  221.     public function getHashMasterList()
  222.     {
  223.         return $this->hashMaster;
  224.     }
  225.    
  226.     /**
  227.     * Return Master MD5 list with more that one matching master
  228.     *
  229.     * i.e. duplicate master records with the same hash
  230.     *
  231.     * @return array
  232.     */
  233.     public function getHashMatchedMasterList()
  234.     {
  235.         $out = array();
  236.         foreach ($this->hashMaster as $key => $item) {
  237.             if (count($item['matchIds']) >= 2) {
  238.                 $out[$key] = $item;
  239.             }
  240.         }
  241.         return $out;
  242.     }
  243.    
  244.     /**
  245.     * All the tranasactions  that matched a master record
  246.     *
  247.     * @return array
  248.     */
  249.     public function getHashTransactionList()
  250.     {
  251.         return $this->hashTransaction;
  252.     }
  253.    
  254.     /**
  255.     * given a master hash then return the details as:
  256.     *
  257.     * i.e. this converts a hash key back into source records for processing.
  258.     *
  259.     * 1) A list of matching master records
  260.     *
  261.     *    e.g. $out['master'][] ...  
  262.     *    
  263.     *
  264.     * 2) A list of matching transaction records
  265.     *
  266.     *    e.g. $out['transaction'][] ...  
  267.     *
  268.     * @param string $hash
  269.     *
  270.     * @return array
  271.     */
  272.     public function getMatchedRecords($hash)
  273.     {
  274.         $out = array('key'         => $hash,
  275.                       'master'      => array(),
  276.                       'transaction' => array(),
  277.                      );
  278.  
  279.         if (!empty($this->hashMaster[$hash])) { // just in case is invalid hash
  280.             foreach ($this->hashMaster[$hash]['matchIds'] as $recordId) {
  281.                 $out['master'][] = $this->master[$recordId];
  282.             }
  283.         }
  284.  
  285.         if (!empty($this->hashTransaction[$hash])) {
  286.             foreach ($this->hashTransaction[$hash]['matchIds'] as $recordId) {
  287.                 $out['transaction'][] = $this->transaction[$recordId];
  288.             }
  289.         }
  290.        
  291.         return $out;
  292.     }
  293.    
  294.     /**
  295.     * Generate an MD5 hash from the required fields in the data record
  296.     * The columns to use will have been passed in the constructor
  297.     * and found in '$keyNames'
  298.     *
  299.     * It is so you don't have to edit anything to use this class
  300.     *
  301.     * @param  array  $row
  302.     *
  303.     * @return string
  304.     */
  305.     public function generateHash($row)
  306.     {
  307.         $text = '';
  308.         foreach ($this->keyNames as $name) {
  309.             $text .= $row[$name];
  310.         }
  311.         return Md5($text);
  312.     }  
  313. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement