Advertisement
Guest User

Untitled

a guest
Sep 9th, 2016
136
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 1.83 KB | None | 0 0
  1. ########################################################################
  2. # This program starts with the list of all CIKs with certain counts
  3. # and crawls the 8-K statements
  4. ########################################################################
  5.  
  6. use WWW::Mechanize;
  7.  
  8. open(MYINFILE, "statsEDGAR.dat");
  9. @linesgn = <MYINFILE>;
  10. close(MYINFILE);
  11. $sizegn = @linesgn;
  12.  
  13. for($i=0; $i<$sizegn; $i=$i+1){
  14.     @arraydata = split(/,/, $linesgn[$i]);
  15.  
  16.     if($arraydata[5]>0){ #require CIK to have some 8K filed
  17.         @arraydatab = split(/,/, $linesgn[$i]);
  18.  
  19.         # This points to where the file for a given CIK is in the file structure
  20.         $filename = $arraydatab[0];
  21.  
  22.         # Get CIK and create directory
  23.         $cik = $arraydatab[2];
  24.         $dirname = $arraydatab[1];
  25.         $makemed = "mkdir Reports\\" . $dirname . "\\" . $cik;
  26.         system($makemed); # creates directory
  27.  
  28.         # Reads in the cik.dat file
  29.         open(MYINFILE, $filename);
  30.         @datagn = <MYINFILE>;
  31.         close(MYINFILE);
  32.         $lenx = @datagn;
  33.  
  34.         # Crawl each occurence of a 13G filing
  35.         for($j=0; $j<$lenx; $j=$j+1){
  36.             @arraydata = split(/\,/, $datagn[$j]);
  37.             if($arraydata[2] =~ m/8K/ || $arraydata[2] =~ /8\-K/){
  38.  
  39.             # Starts crawler, not checking for errors
  40.             my $mech = WWW::Mechanize->new( autocheck => 0 );
  41.            
  42.             # Grabs address
  43.             @arraydatad = split(/\//, $arraydata[4]);
  44.            
  45.             # Formats output file name
  46.             $filenamea = "Reports\\" . $dirname . "\\" . $cik . "\\" . $arraydatad[3];
  47.             chomp($filenamea);
  48.            
  49.             # This is the file from the EDGAR archives
  50.             $filecrawl = "https://www.sec.gov/Archives/" . $arraydata[4];
  51.            
  52.             $status = "Downloading " . $filecrawl;;
  53.             print($status);
  54.                        
  55.             # This crawls the file and saves it to the hard drive
  56.             $mech->get($filecrawl, ':content_file' => $filenamea);
  57.            
  58.             $statusb = "Writing " . $filenamea . "\n\n";
  59.             print($statusb);
  60.             }
  61.         }
  62.     }
  63. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement