Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ########################################################################
- # This program starts with the list of all CIKs with certain counts
- # and crawls the 8-K statements
- ########################################################################
- use WWW::Mechanize;
- open(MYINFILE, "statsEDGAR.dat");
- @linesgn = <MYINFILE>;
- close(MYINFILE);
- $sizegn = @linesgn;
- for($i=0; $i<$sizegn; $i=$i+1){
- @arraydata = split(/,/, $linesgn[$i]);
- if($arraydata[5]>0){ #require CIK to have some 8K filed
- @arraydatab = split(/,/, $linesgn[$i]);
- # This points to where the file for a given CIK is in the file structure
- $filename = $arraydatab[0];
- # Get CIK and create directory
- $cik = $arraydatab[2];
- $dirname = $arraydatab[1];
- $makemed = "mkdir Reports\\" . $dirname . "\\" . $cik;
- system($makemed); # creates directory
- # Reads in the cik.dat file
- open(MYINFILE, $filename);
- @datagn = <MYINFILE>;
- close(MYINFILE);
- $lenx = @datagn;
- # Crawl each occurence of a 13G filing
- for($j=0; $j<$lenx; $j=$j+1){
- @arraydata = split(/\,/, $datagn[$j]);
- if($arraydata[2] =~ m/8K/ || $arraydata[2] =~ /8\-K/){
- # Starts crawler, not checking for errors
- my $mech = WWW::Mechanize->new( autocheck => 0 );
- # Grabs address
- @arraydatad = split(/\//, $arraydata[4]);
- # Formats output file name
- $filenamea = "Reports\\" . $dirname . "\\" . $cik . "\\" . $arraydatad[3];
- chomp($filenamea);
- # This is the file from the EDGAR archives
- $filecrawl = "https://www.sec.gov/Archives/" . $arraydata[4];
- $status = "Downloading " . $filecrawl;;
- print($status);
- # This crawls the file and saves it to the hard drive
- $mech->get($filecrawl, ':content_file' => $filenamea);
- $statusb = "Writing " . $filenamea . "\n\n";
- print($statusb);
- }
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement