--- tv_grab_fi_orig 2010-11-29 21:06:28.730006915 +0200 +++ tv_grab_fi 2010-11-29 21:45:53.490007056 +0200 @@ -235,7 +235,7 @@ die "cannot write to $opt_output: $!" if not defined $fh; $w_args{OUTPUT} = $fh; } -$w_args{encoding} = 'ISO-8859-1'; +$w_args{encoding} = 'UTF-8'; my $writer = new XMLTV::Writer(%w_args); $writer->start($HEAD); @@ -335,27 +335,6 @@ } } -my $warned_bad_chars; -sub tidy( $ ) { - my($string) = @_; - $string = decode_utf8($string); - - # Make contents ISO-8859-1 compatible - # TAB => space - # U+2013 (EN DASH) => - - # U+2019 (RIGHT SINGLE QUOTATION MARK) => ' - # U+201D (RIGHT DOUBLE QUOTATION MARK) => " - $string =~ tr/\t\N{U+2013}\N{U+2019}\N{U+201D}/ \-\'\"/; - - # Warn about incomplete replacement - if ($string =~ s/([^\N{U+0000}-\N{U+00FF}])//g) { - warn "removing bad character: 0x" . sprintf("%04x", ord($1)) - unless ($warned_bad_chars++ or $opt_quiet); - } - - return $string; -} - #### # process_table: fetch a URL and process it # @@ -371,7 +350,7 @@ my $today = UnixDate($date, '%Y%m%d'); my $url = "$SITE/channel/list/$ch_their_id/$today"; t "getting URL: $url"; - my $tree = get_nice_tree $url, \&tidy; + my $tree = get_nice_tree $url; local $SIG{__WARN__} = sub { warn "$url: $_[0]"; }; @@ -553,7 +532,7 @@ my $url="$SITE/channel"; t "getting URL: $url"; - my $tree = get_nice_tree($url, \&decode_utf8); + my $tree = get_nice_tree($url); # FIXME commonize this local $SIG{__WARN__} = sub {