Advertisement
Guest User

Untitled

a guest
Mar 6th, 2015
211
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 31.70 KB | None | 0 0
  1. package CATS::Problem::Repository;
  2.  
  3. use strict;
  4. use warnings;
  5. use POSIX qw(strftime);
  6. use File::Temp qw(tempdir);
  7. use Archive::Zip qw(:ERROR_CODES);
  8. use Fcntl ':mode';
  9. use File::Path;
  10. use File::Copy::Recursive qw(dircopy);
  11. use CATS::Problem::Authors;
  12. use Data::Dumper;
  13. use CATS::Utils qw(escape_html file_type file_type_long);
  14.  
  15. my $tmp_template = 'zipXXXXXX';
  16.  
  17. sub parse_author
  18. {
  19.     my $author = Encode::encode_utf8($_[0]);
  20.     $author = DEFAULT_AUTHOR if !defined $author || $author eq '';
  21.     $author = (split ',', $author)[0];
  22.     chomp $author;
  23.     $author =~ m/^(.*?)\s*(\(.*\))*$/;
  24.     return $1;
  25. }
  26.  
  27. # escape tabs (convert tabs to spaces)
  28. sub untabify
  29. {
  30.     my $line = shift;
  31.  
  32.     while ((my $pos = index($line, "\t")) != -1) {
  33.         if (my $count = (8 - ($pos % 8))) {
  34.             my $spaces = ' ' x $count;
  35.             $line =~ s/\t/$spaces/;
  36.         }
  37.     }
  38.  
  39.     return $line;
  40. }
  41.  
  42. # git may return quoted and escaped filenames
  43. sub unquote
  44. {
  45.     my $str = shift;
  46.  
  47.     sub unq {
  48.         my $seq = shift;
  49.         my %es = ( # character escape codes, aka escape sequences
  50.             't' => "\t",   # tab            (HT, TAB)
  51.             'n' => "\n",   # newline        (NL)
  52.             'r' => "\r",   # return         (CR)
  53.             'f' => "\f",   # form feed      (FF)
  54.             'b' => "\b",   # backspace      (BS)
  55.             'a' => "\a",   # alarm (bell)   (BEL)
  56.             'e' => "\e",   # escape         (ESC)
  57.             'v' => "\013", # vertical tab   (VT)
  58.         );
  59.  
  60.         if ($seq =~ m/^[0-7]{1,3}$/) {
  61.             # octal char sequence
  62.             return chr(oct($seq));
  63.         } elsif (exists $es{$seq}) {
  64.             # C escape sequence, aka character escape code
  65.             return $es{$seq};
  66.         }
  67.         # quoted ordinary character
  68.         return $seq;
  69.     }
  70.  
  71.     if ($str =~ m/^"(.*)"$/) {
  72.         # needs unquoting
  73.         $str = $1;
  74.         $str =~ s/\\([^0-7]|[0-7]{1,3})/unq($1)/eg;
  75.     }
  76.     return $str;
  77. }
  78.  
  79. # Make control characters "printable", using character escape codes (CEC)
  80. sub quot_cec
  81. {
  82.     my $cntrl = shift;
  83.     my %opts = @_;
  84.     my %es = ( # character escape codes, aka escape sequences
  85.         "\t" => '\t',   # tab            (HT)
  86.         "\n" => '\n',   # line feed      (LF)
  87.         "\r" => '\r',   # carrige return (CR)
  88.         "\f" => '\f',   # form feed      (FF)
  89.         "\b" => '\b',   # backspace      (BS)
  90.         "\a" => '\a',   # alarm (bell)   (BEL)
  91.         "\e" => '\e',   # escape         (ESC)
  92.         "\013" => '\v', # vertical tab   (VT)
  93.         "\000" => '\0', # nul character  (NUL)
  94.     );
  95.     my $chr = ( (exists $es{$cntrl})
  96.             ? $es{$cntrl}
  97.             : sprintf('\%2x', ord($cntrl)) );
  98.     if ($opts{-nohtml}) {
  99.         return $chr;
  100.     } else {
  101.         return "<span class=\"cntrl\">$chr</span>";
  102.     }
  103. }
  104.  
  105. sub parse_date
  106. {
  107.     my $epoch = shift;
  108.     my $tz = shift || "-0000";
  109.  
  110.     my %date;
  111.     my @months = ("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec");
  112.     my @days = ("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat");
  113.     my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday) = gmtime($epoch);
  114.     $date{'hour'} = $hour;
  115.     $date{'minute'} = $min;
  116.     $date{'mday'} = $mday;
  117.     $date{'day'} = $days[$wday];
  118.     $date{'month'} = $months[$mon];
  119.     $date{'rfc2822'}   = sprintf "%s, %d %s %4d %02d:%02d:%02d +0000",
  120.                          $days[$wday], $mday, $months[$mon], 1900+$year, $hour ,$min, $sec;
  121.     $date{'mday-time'} = sprintf "%d %s %02d:%02d",
  122.                          $mday, $months[$mon], $hour ,$min;
  123.     $date{'iso-8601'}  = sprintf "%04d-%02d-%02dT%02d:%02d:%02dZ",
  124.                          1900+$year, 1+$mon, $mday, $hour ,$min, $sec;
  125.  
  126.     my ($tz_sign, $tz_hour, $tz_min) =
  127.         ($tz =~ m/^([-+])(\d\d)(\d\d)$/);
  128.     $tz_sign = ($tz_sign eq '-' ? -1 : +1);
  129.     my $local = $epoch + $tz_sign*((($tz_hour*60) + $tz_min)*60);
  130.     ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday) = gmtime($local);
  131.     $date{'hour_local'} = $hour;
  132.     $date{'minute_local'} = $min;
  133.     $date{'tz_local'} = $tz;
  134.     $date{'iso-tz'} = sprintf("%04d-%02d-%02d %02d:%02d:%02d %s",
  135.                               1900+$year, $mon+1, $mday,
  136.                               $hour, $min, $sec, $tz);
  137.     return \%date;
  138. }
  139.  
  140.  
  141. # is current raw difftree line of file deletion
  142. sub is_deleted
  143. {
  144.     my ($self, $diffinfo) = @_;
  145.  
  146.     return defined $diffinfo->{to_id} && $diffinfo->{to_id} eq ('0' x 40);
  147. }
  148.  
  149. ## HTML aware string manipulation
  150.  
  151. # Try to chop given string on a word boundary between position
  152. # $len and $len+$add_len. If there is no word boundary there,
  153. # chop at $len+$add_len. Do not chop if chopped part plus ellipsis
  154. # (marking chopped part) would be longer than given string.
  155. sub chop_str
  156. {
  157.     my $str = shift;
  158.     my $len = shift;
  159.     my $add_len = shift || 10;
  160.     my $where = shift || 'right'; # 'left' | 'center' | 'right'
  161.  
  162.     # Make sure perl knows it is utf8 encoded so we don't
  163.     # cut in the middle of a utf8 multibyte char.
  164.     # $str = to_utf8($str);
  165.     $str = Encode::decode_utf8($str);
  166.  
  167.     # allow only $len chars, but don't cut a word if it would fit in $add_len
  168.     # if it doesn't fit, cut it if it's still longer than the dots we would add
  169.     # remove chopped character entities entirely
  170.  
  171.     # when chopping in the middle, distribute $len into left and right part
  172.     # return early if chopping wouldn't make string shorter
  173.     if ($where eq 'center') {
  174.         return $str if ($len + 5 >= length($str)); # filler is length 5
  175.         $len = int($len/2);
  176.     } else {
  177.         return $str if ($len + 4 >= length($str)); # filler is length 4
  178.     }
  179.  
  180.     # regexps: ending and beginning with word part up to $add_len
  181.     my $endre = qr/.{$len}\w{0,$add_len}/;
  182.     my $begre = qr/\w{0,$add_len}.{$len}/;
  183.  
  184.     if ($where eq 'left') {
  185.         $str =~ m/^(.*?)($begre)$/;
  186.         my ($lead, $body) = ($1, $2);
  187.         if (length($lead) > 4) {
  188.             $lead = " ...";
  189.         }
  190.         return "$lead$body";
  191.  
  192.     } elsif ($where eq 'center') {
  193.         $str =~ m/^($endre)(.*)$/;
  194.         my ($left, $str)  = ($1, $2);
  195.         $str =~ m/^(.*?)($begre)$/;
  196.         my ($mid, $right) = ($1, $2);
  197.         if (length($mid) > 5) {
  198.             $mid = " ... ";
  199.         }
  200.         return "$left$mid$right";
  201.  
  202.     } else {
  203.         $str =~ m/^($endre)(.*)$/;
  204.         my $body = $1;
  205.         my $tail = $2;
  206.         if (length($tail) > 4) {
  207.             $tail = "... ";
  208.         }
  209.         return "$body$tail";
  210.     }
  211. }
  212.  
  213. # replace invalid utf8 character with SUBSTITUTION sequence
  214. sub esc_html
  215. {
  216.     my $str = shift;
  217.     my %opts = @_;
  218.  
  219.     return undef unless defined $str;
  220.  
  221.     # $str = to_utf8($str);
  222.     $str = escape_html($str);
  223.     if ($opts{'-nbsp'}) {
  224.         $str =~ s/ /&nbsp;/g;
  225.     }
  226.     $str =~ s|([[:cntrl:]])|(($1 ne "\t") ? quot_cec($1) : $1)|eg;
  227.     return $str;
  228. }
  229.  
  230. # Highlight selected fragments of string, using given CSS class,
  231. # and escape HTML.  It is assumed that fragments do not overlap.
  232. # Regions are passed as list of pairs (array references).
  233. #
  234. # Example: esc_html_hl_regions("foobar", "mark", [ 0, 3 ]) returns
  235. # '<span class="mark">foo</span>bar'
  236. sub esc_html_hl_regions
  237. {
  238.     my ($str, $css_class, @sel) = @_;
  239.     my %opts = grep { ref($_) ne 'ARRAY' } @sel;
  240.     @sel     = grep { ref($_) eq 'ARRAY' } @sel;
  241.     return esc_html($str, %opts) unless @sel;
  242.  
  243.     my $out = '';
  244.     my $pos = 0;
  245.  
  246.     for my $s (@sel) {
  247.         my ($begin, $end) = @$s;
  248.  
  249.         # Don't create empty <span> elements.
  250.         next if $end <= $begin;
  251.  
  252.         my $escaped = esc_html(substr($str, $begin, $end - $begin),
  253.                                %opts);
  254.  
  255.         $out .= esc_html(substr($str, $pos, $begin - $pos), %opts)
  256.             if ($begin - $pos > 0);
  257.         $out .= sprint('<span class="%s">%s</span>', $css_class, $escaped);
  258.  
  259.         $pos = $end;
  260.     }
  261.     $out .= esc_html(substr($str, $pos), %opts) if ($pos < length($str));
  262.  
  263.     return $out;
  264. }
  265.  
  266.  
  267. sub diff_line_class
  268. {
  269.     my ($self, $line, $from, $to) = @_;
  270.  
  271.     # ordinary diff
  272.     my $num_sign = 1;
  273.     # combined diff
  274.     if ($from && $to && ref($from->{'href'}) eq "ARRAY") {
  275.         $num_sign = scalar @{$from->{'href'}};
  276.     }
  277.  
  278.     my @diff_line_classifier = (
  279.         { regexp => qr/^\@\@{$num_sign} /, class => "chunk_header"},
  280.         { regexp => qr/^\\/,               class => "incomplete"  },
  281.         { regexp => qr/^ {$num_sign}/,     class => "ctx" },
  282.         # classifier for context must come before classifier add/rem,
  283.         # or we would have to use more complicated regexp, for example
  284.         # qr/(?= {0,$m}\+)[+ ]{$num_sign}/, where $m = $num_sign - 1;
  285.         { regexp => qr/^[+ ]{$num_sign}/,   class => "add" },
  286.         { regexp => qr/^[- ]{$num_sign}/,   class => "rem" },
  287.     );
  288.     for my $clsfy (@diff_line_classifier) {
  289.         return $clsfy->{'class'}
  290.             if ($line =~ $clsfy->{'regexp'});
  291.     }
  292.  
  293.     # fallback
  294.     return "";
  295. }
  296.  
  297. sub format_difftree
  298. {
  299.     my ($self, @difftree) = @_;
  300.     my $difftree = [];
  301.     foreach my $diff (@difftree) {
  302.         my $difftree_line = { file => escape_html($diff->{file})};
  303.         my ($to_mode_oct, $to_mode_str, $to_file_type);
  304.         my ($from_mode_oct, $from_mode_str, $from_file_type);
  305.         if ($diff->{to_mode} ne ('0' x 6)) {
  306.             $to_mode_oct = oct $diff->{to_mode};
  307.             if (S_ISREG($to_mode_oct)) { # only for regular file
  308.                 $to_mode_str = sprintf("%04o", $to_mode_oct & 0777); # permission bits
  309.             }
  310.             $to_file_type = file_type($diff->{to_mode});
  311.         }
  312.         if ($diff->{from_mode} ne ('0' x 6)) {
  313.             $from_mode_oct = oct $diff->{from_mode};
  314.             if (S_ISREG($from_mode_oct)) { # only for regular file
  315.                 $from_mode_str = sprintf("%04o", $from_mode_oct & 0777); # permission bits
  316.             }
  317.             $from_file_type = file_type($diff->{from_mode});
  318.         }
  319.  
  320.         if ($diff->{status} eq "A") { # created
  321.             $difftree_line->{status} = 'new';
  322.             $difftree_line->{status_string} = "new $to_file_type";
  323.             $difftree_line->{status_string} .= " with mode: $to_mode_str" if $to_mode_str;
  324.         } elsif ($diff->{status} eq "D") { # deleted
  325.             $difftree_line->{status} = 'deleted';
  326.             $difftree_line->{status_string} = "deleted $from_file_type";
  327.         } elsif ($diff->{status} eq "M" || $diff->{status} eq "T") { # modified, or type changed
  328.             $difftree_line->{status} = 'changed';
  329.             if ($diff->{from_mode} != $diff->{to_mode}) {
  330.                 $difftree_line->{status_string} = 'changed';
  331.                 $difftree_line->{status_string} .= " from $from_file_type to $to_file_type" if $from_file_type ne $to_file_type;
  332.                 if (($from_mode_oct & 0777) != ($to_mode_oct & 0777)) {
  333.                     if ($from_mode_str && $to_mode_str) {
  334.                         $difftree_line->{status_string} .= " mode: $from_mode_str->$to_mode_str";
  335.                     } elsif ($to_mode_str) {
  336.                         $difftree_line->{status_string} .= " mode: $to_mode_str";
  337.                     }
  338.                 }
  339.             }
  340.         } elsif ($diff->{status} eq "R" || $diff->{status} eq "C") { # renamed or copied
  341.             $difftree_line->{file} = $diff->{to_file};
  342.             my $mode_chng = '';
  343.             if ($diff->{from_mode} != $diff->{to_mode}) {
  344.                 # mode also for directories, so we cannot use $to_mode_str
  345.                 $mode_chng = sprintf(", mode: %04o", $to_mode_oct & 0777);
  346.             }
  347.             my %status_name = ('R' => 'moved', 'C' => 'copied');
  348.             $difftree_line->{status} = $status_name{$diff->{status}};
  349.             $difftree_line->{status_string} = sprintf("%s from %s with %d%%%s", $difftree_line->{status},
  350.                                                         escape_html($diff->{from_file}), int $diff->{similarity}, $mode_chng);
  351.         } # we should not encounter Unmerged (U) or Unknown (X) status
  352.         push @{$difftree}, $difftree_line;
  353.     }
  354.     return $difftree;
  355. }
  356.  
  357. # process patch (diff) line (not to be used for diff headers),
  358. # returning HTML-formatted (but not wrapped) line.
  359. sub format_diff_line
  360. {
  361.     my ($self, $line, $diff_class, $from, $to) = @_;
  362.  
  363.     chomp $line;
  364.     $line = untabify($line);
  365.  
  366.     if ($from && $to && $line =~ m/^\@{2} /) {
  367.         $line = $self->format_unidiff_chunk_header($line, $from, $to);
  368.     } else {
  369.         $line = esc_html($line, -nbsp=>1);
  370.     }
  371.  
  372.     my $diff_classes = "diff";
  373.     $diff_classes .= " $diff_class" if ($diff_class);
  374.     return { text => $line, class=> $diff_classes };
  375. }
  376.  
  377. # assumes that $from and $to are defined and correctly filled,
  378. # and that $line holds a line of chunk header for unified diff
  379. sub format_unidiff_chunk_header
  380. {
  381.     my ($self, $line, $from, $to) = @_;
  382.  
  383.     my ($from_text, $from_start, $from_lines, $to_text, $to_start, $to_lines, $section) =
  384.         $line =~ m/^\@{2} (-(\d+)(?:,(\d+))?) (\+(\d+)(?:,(\d+))?) \@{2}(.*)$/;
  385.  
  386.     $from_lines = 0 unless defined $from_lines;
  387.     $to_lines   = 0 unless defined $to_lines;
  388.  
  389.     # if ($from->{'href'}) {
  390.     #     $from_text = $cgi->a({-href=>"$from->{'href'}#l$from_start",
  391.     #                          -class=>"list"}, $from_text);
  392.     # }
  393.     # if ($to->{'href'}) {
  394.     #     $to_text   = $cgi->a({-href=>"$to->{'href'}#l$to_start",
  395.     #                          -class=>"list"}, $to_text);
  396.     # }
  397.     $line = "<span class=\"chunk_info\">@@ $from_text $to_text @@</span>" .
  398.             "<span class=\"section\">" . esc_html($section, -nbsp=>1) . "</span>";
  399.     return $line;
  400. }
  401.  
  402. # Format removed and added line, mark changed part and HTML-format them.
  403. # Implementation is based on contrib/diff-highlight
  404. sub format_rem_add_lines_pair
  405. {
  406.     my ($self, $rem, $add) = @_;
  407.  
  408.     # We need to untabify lines before split()'ing them;
  409.     # otherwise offsets would be invalid.
  410.     chomp $rem;
  411.     chomp $add;
  412.     $rem = untabify($rem);
  413.     $add = untabify($add);
  414.  
  415.     my @rem = split(//, $rem);
  416.     my @add = split(//, $add);
  417.     my ($esc_rem, $esc_add);
  418.     # Ignore leading +/- characters for each parent.
  419.     my ($prefix_len, $suffix_len) = (1, 0);
  420.     my ($prefix_has_nonspace, $suffix_has_nonspace);
  421.  
  422.     my $shorter = (@rem < @add) ? @rem : @add;
  423.     while ($prefix_len < $shorter) {
  424.         last if ($rem[$prefix_len] ne $add[$prefix_len]);
  425.  
  426.         $prefix_has_nonspace = 1 if ($rem[$prefix_len] !~ /\s/);
  427.         $prefix_len++;
  428.     }
  429.  
  430.     while ($prefix_len + $suffix_len < $shorter) {
  431.         last if ($rem[-1 - $suffix_len] ne $add[-1 - $suffix_len]);
  432.  
  433.         $suffix_has_nonspace = 1 if ($rem[-1 - $suffix_len] !~ /\s/);
  434.         $suffix_len++;
  435.     }
  436.  
  437.     # Mark lines that are different from each other, but have some common
  438.     # part that isn't whitespace.  If lines are completely different, don't
  439.     # mark them because that would make output unreadable, especially if
  440.     # diff consists of multiple lines.
  441.     if ($prefix_has_nonspace || $suffix_has_nonspace) {
  442.         $esc_rem = esc_html_hl_regions($rem, 'marked',
  443.                 [$prefix_len, @rem - $suffix_len], -nbsp=>1);
  444.         $esc_add = esc_html_hl_regions($add, 'marked',
  445.                 [$prefix_len, @add - $suffix_len], -nbsp=>1);
  446.     } else {
  447.         $esc_rem = esc_html($rem, -nbsp=>1);
  448.         $esc_add = esc_html($add, -nbsp=>1);
  449.     }
  450.  
  451.     return $self->format_diff_line(\$esc_rem, 'rem'),
  452.            $self->format_diff_line(\$esc_add, 'add');
  453. }
  454.  
  455. # HTML-format diff context, removed and added lines.
  456. sub format_ctx_rem_add_lines
  457. {
  458.     my ($self, $ctx, $rem, $add) = @_;
  459.     my (@new_ctx, @new_rem, @new_add);
  460.  
  461.     if (@$add > 0 && @$add == @$rem) {
  462.         for (my $i = 0; $i < @$add; $i++) {
  463.             my ($line_rem, $line_add) = $self->format_rem_add_lines_pair($rem->[$i], $add->[$i]);
  464.             push @new_rem, $line_rem;
  465.             push @new_add, $line_add;
  466.         }
  467.     } else {
  468.         @new_rem = map { $self->format_diff_line($_, 'rem') } @$rem;
  469.         @new_add = map { $self->format_diff_line($_, 'add') } @$add;
  470.     }
  471.  
  472.     @new_ctx = map { format_diff_line($_, 'ctx') } @$ctx;
  473.  
  474.     return (@new_ctx, @new_rem, @new_add);
  475. }
  476.  
  477. sub format_diff_chunk
  478. {
  479.     my ($self, $from, $to, @chunk) = @_;
  480.     my (@ctx, @rem, @add);
  481.  
  482.     # The class of the previous line.
  483.     my $prev_class = '';
  484.  
  485.     return unless @chunk;
  486.  
  487.     # incomplete last line might be among removed or added lines,
  488.     # or both, or among context lines: find which
  489.     for (my $i = 1; $i < @chunk; $i++) {
  490.         if ($chunk[$i][0] eq 'incomplete') {
  491.             $chunk[$i][0] = $chunk[$i-1][0];
  492.         }
  493.     }
  494.  
  495.     # guardian
  496.     push @chunk, ["", ""];
  497.  
  498.     my @result_chunks = ();
  499.     my $result_chunk = { header => undef, lines => []};
  500.     foreach my $line_info (@chunk) {
  501.         my ($class, $line) = @$line_info;
  502.  
  503.         # print chunk headers
  504.         if ($class && $class eq 'chunk_header') {
  505.             $result_chunk->{header} = $self->format_diff_line($line, $class, $from, $to);
  506.             next;
  507.         }
  508.  
  509.         ## print from accumulator when have some add/rem lines or end
  510.         # of chunk (flush context lines), or when have add and rem
  511.         # lines and new block is reached (otherwise add/rem lines could
  512.         # be reordered)
  513.         if (!$class || ((@rem || @add) && $class eq 'ctx') ||
  514.             (@rem && @add && $class ne $prev_class)) {
  515.             @{$result_chunk->{lines}} = $self->format_ctx_rem_add_lines(\@ctx, \@rem, \@add);
  516.             @ctx = @rem = @add = ();
  517.             push @result_chunks, $result_chunk;
  518.             $result_chunk = { header => undef, lines => []};
  519.         }
  520.  
  521.         ## adding lines to accumulator
  522.         # guardian value
  523.         last unless $line;
  524.         # rem, add or change
  525.         if ($class eq 'rem') {
  526.             push @rem, $line;
  527.         } elsif ($class eq 'add') {
  528.             push @add, $line;
  529.         }
  530.         # context line
  531.         if ($class eq 'ctx') {
  532.             push @ctx, $line;
  533.         }
  534.  
  535.         $prev_class = $class;
  536.     }
  537.     return \@result_chunks;
  538. }
  539.  
  540. # parse extended diff header line, before patch itself
  541. sub format_extended_diff_header_line
  542. {
  543.     my ($self, $line, $diffinfo, $from, $to) = @_;
  544.     # match <path>
  545.     $line .= $from->{file} if $line =~ s!^((copy|rename) from ).*$!$1! && $from->{href};
  546.     $line .= $to->{file} if $line =~ s!^((copy|rename) to ).*$!$1! && $to->{href};
  547.  
  548.     # match single <mode>
  549.     $line .= sprintf('<span class="info"> (%s)</span>', file_type_long($1)) if $line =~ m/\s(\d{6})$/;
  550.     # match <hash>
  551.     if ($line =~ m/^index [0-9a-fA-F]{40}..[0-9a-fA-F]{40}/) {
  552.         # can match only for ordinary diff
  553.         my ($from_link, $to_link);
  554.         if ($from->{href}) {
  555.             $from_link = substr($diffinfo->{from_id}, 0, 7);
  556.         } else {
  557.             $from_link = '0' x 7;
  558.         }
  559.         if ($to->{href}) {
  560.             $to_link = substr($diffinfo->{to_id}, 0, 7);
  561.         } else {
  562.             $to_link = '0' x 7;
  563.         }
  564.         my ($from_id, $to_id) = ($diffinfo->{from_id}, $diffinfo->{to_id});
  565.         $line =~ s!$from_id\.\.$to_id!$from_link..$to_link!;
  566.     }
  567.  
  568.     return $line;
  569. }
  570.  
  571. # format git diff header line, i.e. "diff --(git|combined|cc) ..."
  572. sub format_git_diff_header_line
  573. {
  574.     my ($self, $line, $diffinfo, $from, $to) = @_;
  575.  
  576.     $line =~ s!^(diff (.*?) )"?a/.*$!$1!;
  577.    $line .= 'a/' . escape_html($from->{file});
  578.    $line .= ' b/' . escape_html($to->{file});
  579.  
  580.    return $line;
  581. }
  582.  
  583. # parse line of git-diff-tree "raw" output
  584. sub parse_difftree_raw_line
  585. {
  586.    my ($self, $line) = @_;
  587.    my %res;
  588.  
  589.    # ':100644 100644 03b218260e99b78c6df0ed378e59ed9205ccc96d 3b93d5e7cc7f7dd4ebed13a5cc1a4ad976fc94d8 M   ls-files.c'
  590.    # ':100644 100644 7f9281985086971d3877aca27704f2aaf9c448ce bc190ebc71bbd923f2b728e505408f5e54bd073a M   rev-tree.c'
  591.    if ($line =~ m/^:([0-7]{6}) ([0-7]{6}) ([0-9a-fA-F]{40}) ([0-9a-fA-F]{40}) (.)([0-9]{0,3})\t(.*)$/) {
  592.        $res{from_mode} = $1;
  593.        $res{to_mode} = $2;
  594.        $res{from_id} = $3;
  595.        $res{to_id} = $4;
  596.        $res{status} = $5;
  597.        $res{similarity} = $6;
  598.        if ($res{status} eq 'R' || $res{status} eq 'C') { # renamed or copied
  599.            ($res{from_file}, $res{to_file}) = map { unquote($_) } split("\t", $7);
  600.        } else {
  601.            $res{from_file} = $res{to_file} = $res{file} = unquote($7);
  602.        }
  603.    }
  604.    # '::100755 100755 100755 60e79ca1b01bc8b057abe17ddab484699a7f5fdb 94067cc5f73388f33722d52ae02f44692bc07490 94067cc5f73388f33722d52ae02f44692bc07490 MR git-gui/git-gui.sh'
  605.    # combined diff (for merge commit)
  606.    elsif ($line =~ s/^(::+)((?:[0-7]{6} )+)((?:[0-9a-fA-F]{40} )+)([a-zA-Z]+)\t(.*)$//) {
  607.        $res{nparents}  = length($1);
  608.        $res{from_mode} = [ split(' ', $2) ];
  609.        $res{to_mode} = pop @{$res{from_mode}};
  610.        $res{from_id} = [ split(' ', $3) ];
  611.        $res{to_id} = pop @{$res{from_id}};
  612.        $res{status} = [ split('', $4) ];
  613.        $res{to_file} = unquote($5);
  614.    }
  615.    # 'c512b523472485aef4fff9e57b229d9d243c967f'
  616.    elsif ($line =~ m/^([0-9a-fA-F]{40})$/) {
  617.        $res{commit} = $1;
  618.    }
  619.  
  620.    return \%res;
  621. }
  622.  
  623. # generates _two_ hashes, references to which are passed as 2 and 3 argument
  624. sub parse_from_to_diffinfo
  625. {
  626.    my ($self, $diffinfo, $from, $to) = @_;
  627.  
  628.    # ordinary (not combined) diff
  629.    $from->{file} = $diffinfo->{from_file};
  630.    $from->{href} = $diffinfo->{status} ne "A";
  631.  
  632.    $to->{file} = $diffinfo->{to_file};
  633.    $to->{href} = !$self->is_deleted($diffinfo); # file exists in result
  634. }
  635.  
  636. # parse from-file/to-file diff header
  637. sub parse_diff_from_to_header
  638. {
  639.    my ($self, $from_line, $to_line, $diffinfo, $from, $to) = @_;
  640.    my $line;
  641.    my $result = '';
  642.  
  643.    $line = $from_line;
  644.    # no extra formatting for "^--- /dev/null"
  645.    if (!$diffinfo->{nparents}) {
  646.        # ordinary (single parent) diff
  647.        $from->{header} = '--- a/' . escape_html($from->{file}) if $line =~ m!^--- "?a/!;
  648.         # $result .= qq!<div class="diff from_file">$line</div>\n!;
  649.     }
  650.  
  651.     $line = $to_line;
  652.     # no extra formatting for "^+++ /dev/null"
  653.     if ($line =~ m!^\+\+\+ "?b/!) {
  654.        $to->{header} = '+++ b/' . escape_html($to->{file});
  655.    }
  656. }
  657.  
  658. sub parse_patches
  659. {
  660.    my ($self, $difftree, $lines, $hash, $hash_parent) = @_;
  661.  
  662.    my $patch_idx = 0;
  663.    my $patch_number = 0;
  664.    my $patch_line;
  665.    my $diffinfo;
  666.    my $to_name;
  667.    my @chunk; # for side-by-side diff
  668.    my @patches = ();
  669.  
  670.    # skip to first patch
  671.    while ($patch_line = shift @$lines) {
  672.        chomp $patch_line;
  673.  
  674.        last if ($patch_line =~ m/^diff /);
  675.    }
  676.  
  677.    my (%from, %to, %patch_desc);
  678. PATCH:
  679.    while ($patch_line) {
  680.        %from = ();
  681.        %to = ();
  682.        # parse "git diff" header line
  683.        if ($patch_line =~ m/^diff --git (\"(?:[^\\\"]*(?:\\.[^\\\"]*)*)\"|[^ "]*) (.*)$/) {
  684.             # $1 is from_name, which we do not use
  685.             $to_name = unquote($2);
  686.             $to_name =~ s!^b/!!;
  687.         } else {
  688.             $to_name = undef;
  689.         }
  690.  
  691.         # advance raw git-diff output if needed
  692.         $patch_idx++ if defined $diffinfo;
  693.  
  694.         # read and prepare patch information
  695.         $diffinfo = $difftree->[$patch_idx];
  696.  
  697.         # modifies %from, %to hashes
  698.         $self->parse_from_to_diffinfo($diffinfo, \%from, \%to);
  699.  
  700.         # this is first patch for raw difftree line with $patch_idx index
  701.         # we index @$difftree array from 0, but number patches from 1
  702.         $patch_desc{idx} = $patch_idx + 1;
  703.  
  704.         # git diff header
  705.         $patch_number++;
  706.         $patch_desc{header} = $self->format_git_diff_header_line($patch_line, $diffinfo, \%from, \%to);
  707.     EXTENDED_HEADER:
  708.         while ($patch_line = shift @$lines) {
  709.             chomp $patch_line;
  710.  
  711.             last EXTENDED_HEADER if ($patch_line =~ m/^--- |^diff /);
  712.             $patch_desc{extended_header} = $self->format_extended_diff_header_line($patch_line, $diffinfo, \%from, \%to);
  713.         }
  714.  
  715.         # from-file/to-file diff header
  716.         if (!$patch_line) {
  717.             die('error');
  718.             last PATCH;
  719.         }
  720.         next PATCH if $patch_line =~ m/^diff /;
  721.  
  722.         my $last_patch_line = $patch_line;
  723.         $patch_line = shift @$lines;
  724.         chomp $patch_line;
  725.  
  726.         $self->parse_diff_from_to_header($last_patch_line, $patch_line, $diffinfo, \%from, \%to);
  727.  
  728.         # the patch itself
  729.     LINE:
  730.         $patch_desc{chunks} = [];
  731.         while ($patch_line = shift @$lines) {
  732.             chomp $patch_line;
  733.  
  734.             next PATCH if ($patch_line =~ m/^diff /);
  735.             my $class = $self->diff_line_class($patch_line, \%from, \%to);
  736.  
  737.             if ($class eq 'chunk_header') {
  738.                 push @{$patch_desc{chunks}}, $self->format_diff_chunk(\%from, \%to, @chunk);
  739.                 @chunk = ();
  740.             }
  741.  
  742.             push @chunk, [ $class, $patch_line ];
  743.         }
  744.     } continue {
  745.         if (@chunk) {
  746.             push @{$patch_desc{chunks}}, $self->format_diff_chunk(\%from, \%to, @chunk);
  747.             @chunk = ();
  748.         }
  749.         push @patches, \%patch_desc if %patch_desc;
  750.         %patch_desc = ();
  751.     }
  752.     return \@patches;
  753. }
  754.  
  755. sub parse_commit_text
  756. {
  757.     my ($self, @commit_lines, $withparents) = @_;
  758.     my %co;
  759.     pop @commit_lines; # Remove '\0'
  760.  
  761.     @commit_lines or return;
  762.  
  763.     my $header = shift @commit_lines;
  764.     $header =~ m/^[0-9a-fA-F]{40}/ or return;
  765.     ($co{'id'}, my @parents) = split ' ', $header;
  766.  
  767.     while (my $line = shift @commit_lines) {
  768.         last if $line eq "\n";
  769.         if ($line =~ m/^tree ([0-9a-fA-F]{40})$/) {
  770.             $co{'tree'} = $1;
  771.         } elsif ((!defined $withparents) && ($line =~ m/^parent ([0-9a-fA-F]{40})$/)) {
  772.             push @parents, $1;
  773.         } else {
  774.             foreach my $who (qw(author committer)) {
  775.                 if ($line =~ m/^${who} (.*) ([0-9]+) (.*)$/) {
  776.                     $co{"${who}"} = Encode::decode_utf8($1);
  777.                     $co{"${who}_epoch"} = $2;
  778.                     $co{"${who}_tz"} = $3;
  779.                     $co{"${who}_date"} = parse_date($2, $3);
  780.                     $co{"${who}_formatted_ts"} = sprintf(
  781.                         '%s (%02d:%02d %s)',
  782.                         $co{"${who}_date"}->{'rfc2822'},
  783.                         $co{"${who}_date"}->{'hour_local'},
  784.                         $co{"${who}_date"}->{'minute_local'},
  785.                         $co{"${who}_date"}->{'tz_local'}
  786.                     );
  787.                     if ($co{"${who}"} =~ m/^([^<]+) <([^>]*)>/) {
  788.                         $co{"${who}_name"}  = escape_html($1);
  789.                         $co{"${who}_email"} = $2;
  790.                     } else {
  791.                         $co{"${who}_name"} = escape_html($co{"${who}"});
  792.                     }
  793.                 }
  794.             }
  795.         }
  796.     }
  797.     defined $co{'tree'} or return;
  798.  
  799.     $co{'parents'} = \@parents;
  800.     $co{'parent'} = $parents[0];
  801.  
  802.     foreach my $title (@commit_lines) {
  803.         $title =~ s/^    //;
  804.         if ($title ne "") {
  805.             $co{'title'} = escape_html(chop_str($title, 80, 5));
  806.             $co{'title_short'} = escape_html(chop_str($title, 50, 5));
  807.             last;
  808.         }
  809.     }
  810.     if (! defined $co{'title'} || $co{'title'} eq "") {
  811.         $co{'title'} = $co{'title_short'} = '(no commit message)';
  812.     }
  813.     # remove added spaces
  814.     foreach my $line (@commit_lines) {
  815.         $line =~ s/^    //;
  816.     }
  817.     $co{'comment'} = \@commit_lines;
  818.  
  819.     return %co;
  820. }
  821.  
  822. sub commif_diff
  823. {
  824.     my ($self, %co) = @_;
  825.     @{$co{'parents'}} == 1 or die('Too much parents');
  826.     # my $hash_parent_param = @{$co{'parents'}} > 1 ? '--cc' : $co{'parent'} || '--root';
  827.     my $hash_parent_param = $co{parent};
  828.     my @lines = $self->git("diff-tree -r -M --no-commit-id --patch-with-raw --full-index $hash_parent_param ${co{id}}");
  829.     my @difftree;
  830.     while (scalar @lines) {
  831.         my $line = shift @lines;
  832.         chomp $line;
  833.         # empty line ends raw part of diff-tree output
  834.         last unless $line;
  835.         push @difftree, $self->parse_difftree_raw_line($line);
  836.     }
  837.     my $patches = $self->parse_patches(\@difftree, \@lines, $co{id}, $hash_parent_param);
  838.     return (difftree => $self->format_difftree(@difftree), patches => $patches);
  839. }
  840.  
  841.  
  842. sub commit_info
  843. {
  844.     my ($self, $sha) = @_;
  845.     my %co = $self->parse_commit_text($self->git("rev-list --header --max-count=1 $sha"), 1);
  846.     #my ($difftree, $patches) = $self->commif_diff(%co);
  847.     #return { info => \%co, difftree => $difftree, patches => $patches };
  848.     return { info => \%co, $self->commif_diff(%co) };
  849. }
  850.  
  851. sub extract_zip
  852. {
  853.     my ($path, $zip_name) = @_;
  854.     my $zip = Archive::Zip->new();
  855.     $zip->read($zip_name) == AZ_OK or die "open zip '$zip_name' failed!\n";
  856.     $zip->extractTree('', "$path/") == AZ_OK or die "can't extract '$zip_name' to $path\n";
  857. }
  858.  
  859. sub new
  860. {
  861.     my ($class, %opts) = @_;
  862.     $opts{dir} //= '';
  863.     $opts{git_dir} //= "$opts{dir}.git";
  864.     return bless \%opts => $class;
  865. }
  866.  
  867. sub set_repo
  868. {
  869.     my ($self, $dir) = @_;
  870.     $self->{dir} = $dir;
  871.     $self->{git_dir} = "$dir.git";
  872.     return $self;
  873. }
  874.  
  875. sub git
  876. {
  877.     my ($self, $git_tail) = @_;
  878.     my @lines = `git --git-dir=$self->{git_dir} --work-tree=$self->{dir} $git_tail`;  #Apache sub procces
  879.     $self->{logger}->note(join '', @lines) if exists $self->{logger};
  880.     return @lines;
  881. }
  882.  
  883. sub log
  884. {
  885.     my ($self, %opts) = @_;
  886.     my $sha = $opts{sha} // '';
  887.     my $s = Encode::decode_utf8(join '', $self->git("log -z --format=format:'%H||%h||%an||%ae||%at||%ct||%B' $sha"));
  888.     my @out = ();
  889.     foreach my $log (split "\0", $s) {
  890.         my ($sha, $abrev_sha, $author, $email, $adate, $cdate, $message) = split /\|\|/, $log;
  891.         push @out, {
  892.             sha => $sha,
  893.             abbreviated_sha => $abrev_sha,
  894.             message => $message,
  895.             author => $author,
  896.             author_email => $email,
  897.             author_date => strftime('%d.%m.%Y %H:%M', gmtime($adate)), #to do: figure out about locale
  898.             committer_date => strftime('%d.%m.%Y %H:%M', gmtime($cdate))
  899.         };
  900.     }
  901.     return \@out;
  902. }
  903.  
  904. sub new_repo
  905. {
  906.     my ($self, $problem, %opts) = @_;
  907.     mkdir $self->{dir} or die "Unable to create repo dir: $!";
  908.     if (exists $opts{from}) {
  909.         $self->move_history(%opts);
  910.     }
  911.     else {
  912.         $self->git('init');
  913.     }
  914.     $self->add($problem, message => (exists $opts{from} ? 'Update task' : 'Initial commit'));
  915.     return $self;
  916. }
  917.  
  918. sub delete
  919. {
  920.     my ($self) = @_;
  921.     die "Git repository doesn't exist" unless -d $self->{dir};
  922.     rmtree($self->{dir});
  923. }
  924.  
  925. sub init
  926. {
  927.     my ($self, $problem, %opts) = @_;
  928.     mkdir $self->{dir} or die "Unable to create repo dir: $!";
  929.     $self->git('init');
  930.     $self->add($problem, message => 'Initial commit');
  931.     return $self;
  932. }
  933.  
  934. sub add
  935. {
  936.     my ($self, $problem, %opts) = @_;
  937.     my $tmpdir = tempdir($tmp_template, TMPDIR => 1, CLEANUP => 1);
  938.     extract_zip($tmpdir, $problem->{zip});
  939.     $self->git('rm . -r --ignore-unmatch');
  940.     dircopy($tmpdir, $self->{dir});
  941.     $self->commit(exists $opts{message} ? $opts{message} : 'Update task', parse_author($problem->{author}));
  942.     return $self;
  943. }
  944.  
  945. sub move_history
  946. {
  947.     my ($self, %opts) = @_;
  948.     mkdir $self->{dir} unless -d $self->{dir};
  949.     dircopy($opts{from}, $self->{dir}) or die "Can't copy dir: $!";
  950.     $self->git("reset --hard $opts{sha}");
  951.     return $self;
  952. }
  953.  
  954. sub commit
  955. {
  956.     my ($self, $message, $author) = @_;
  957.     my ($git_author, $git_author_email) = get_git_author_info($author);
  958.     $self->git("config user.name '$git_author'");
  959.     $self->git("config user.email '$git_author_email'");
  960.     $self->git('add -A');
  961.     $self->git(qq~commit --message="$message"~);
  962.     $self->git('gc');
  963.     return $self;
  964. }
  965.  
  966. 1;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement