Practice of Programming

プログラム とか Linuxとかの話題

JSON, YAML & utf8 flag

-------------------------------------------
JSON
-------------------------------------------
SUBROUTINE            INPUT OUTPUT ENCODE

to_json(JSON)          byte   flag latin1 
to_json(JSON)          flag   flag   utf8 
from_json(JSON)        byte   flag latin1 
from_json(JSON)        flag   flag   utf8 

encode_json(JSON)      flag   byte 
encode_json(JSON::XS)  flag   byte 
decode_json(JSON)      byte   flag   utf8
decode_json(JSON::XS)  byte   flag   utf8

decode(JSON OO)        byte   flag   utf8 
decode(JSON OO)        flag   flag   utf8 
-------------------------------------------
YAML
-------------------------------------------
SUBROUTINE            INPUT OUTPUT ENCODE

Load(YAML)             byte   byte 
Load(YAML)             flag   flag   utf8
Load(YAML::XS)         byte   flag   utf8

Dump(YAML)             byte   byte 
Dump(YAML)             flag   flag   utf8
Dump(YAML::XS)         flag   byte 

JSON & JSON::XS has same interface.
JSON & YAML::XS has same input/output pattern, but YAML is different.

JSON OO's code:

JSON->new->utf8(!utf8::is_utf8($json_text))->decode($json_text);

For encoding, utf8 method is only for output.
$perl_data must be turned utf8 flag on.

JSON->new->utf8(1)->encode($perl_data); # utf8 encoded scalar
JSON->new->utf8(0)->encode($perl_data); # utf8 flag scalar

To get the above table, I used the following code.

#!/usr/bin/perl

use feature qw/say/;
use strict;
use warnings;
use Encode;
use JSON ();
use JSON::XS ();
use YAML ();
use YAML::XS ();

sub json {
  return {
          input => {
                    flag => {
                             json => do {use utf8; q{{"hoge" : "あ"}}},
                             data => do {use utf8; {"hoge" => "あ"}},
                            },
                    byte => {
                             json => do {no utf8; q{{"hoge" : "あ"}}},
                             data => do {no utf8; {"hoge" => "あ"}},
                            },
                   },
          code => {
                   json => {
                            'from_json(JSON)'       => sub {my $r = eval {JSON::from_json(shift)};   $r ? (1, $r) : (0, $@)},
                            'decode_json(JSON)'     => sub {my $r = eval {JSON::decode_json(shift)}; $r ? (1, $r) : (0, $@)},
                            'decode_json(JSON::XS)' => sub {my $r = eval {JSON::XS::decode_json(shift)}; $r ? (1, $r) : (0, $@)},
                            'decode(JSON OO)'       => sub {my $d = shift; my $r = JSON->new->utf8(!utf8::is_utf8($d))->decode($d); (1, $r)},
                           },
                   data  => {
                             'to_json(JSON)'        => sub {my $r = eval {JSON::to_json(shift)};     $r ? (1, $r) : (0, $@)},
                             'encode_json(JSON)'    => sub {my $r = eval {JSON::encode_json(shift)}; $r ? (1, $r) : (0, $@)},
                             'encode_json(JSON::XS)' => sub {my $r = eval {JSON::XS::encode_json(shift)}; $r ? (1, $r) : (0, $@)},
                            }

                  }
         };
}

sub yaml {
  return {
          input => {
                    flag => {
                             yaml => do {use utf8; qq{---\nhoge : あ\n}},
                             data => do {use utf8; {"hoge" => "あ"}},
                            },
                    byte => {
                             yaml => do {no utf8; qq{---\nhoge : あ\n}},
                             data => do {no utf8; {"hoge" => "あ"}},
                            },
                   },
          code => {
                   yaml => {
                            'Load(YAML)'     => sub {my $r = eval {YAML::Load(shift)};     $r ? (1, $r) : (0, $@)},
                            'Load(YAML::XS)' => sub {my $r = eval {YAML::XS::Load(shift)}; $r ? (1, $r) : (0, $@)},
                           },
                   data  => {
                             'Dump(YAML)'        => sub {my $r = eval {YAML::Dump(shift)};     $r ? (1, $r) : (0, $@)},
                             'Dump(YAML::XS)'    => sub {my $r = eval {YAML::XS::Dump(shift)}; $r ? (1, $r) : (0, $@)},
                            }
                  }
         };
}


execute("JSON", \&json);
execute("YAML", \&yaml);

sub execute {
  my ($label, $type) = @_;
  my %input = %{$type->()->{input}};
  my %sub   = %{$type->()->{code}};
  say "--------------------------------------------------------------------------------------";
  say "$label";
  say "--------------------------------------------------------------------------------------";
  my %result;

  foreach my $in (keys %input) {
    foreach my $format (keys %{$input{$in}}) {
      foreach my $sub (keys %{$sub{$format}}) {
        my $code = $sub{$format}->{$sub};
        my ($success, $content) = $code->($input{$in}->{$format});
        if ($success) {
          my $c;
          my $flg = utf8::is_utf8($format eq 'data' ? ($c = $content) : ($c = $content->{hoge}));
          $c ||= '';
          $flg ||= 0;
          my $encode = '';
          $encode = (grep {ord($_) >= 0x100} ($c =~m{(.)}g)) ? 'utf8' : 'latin1'  if $flg;
          $result{$format}->{$sub}->{$in} = {success => 1, flag => ($flg ? 'flag' : 'byte'), encode => $encode, content => ($flg ? Encode::encode($encode, $c) : $c)};
        } else {
          $content ||= '';
          $content =~s{at /home/.+$}{};
          $result{$format}->{$sub}->{$in} = {success => 0, error => $content};
        }
      }
    }
  }

  my $line_format = "%-21s %5s %2s %6s %-20s %s\n";
  printf $line_format, 'SUBROUTINE', 'INPUT', 'OK', 'OUTPUT', 'CONTENT', 'ERROR';
  say "";
  foreach my $format (keys %result) {
    foreach my $sub (sort keys %{$result{$format}}) {
      foreach my $input (keys %{$result{$format}{$sub}}) {
        $result{$format}{$sub}{$input}->{content} =~s{\n}{ }g if $result{$format}{$sub}{$input}->{content};
        $result{$format}{$sub}{$input}->{error}   =~s{\n}{ }g if $result{$format}{$sub}{$input}->{error};
        printf $line_format, $sub, $input, map {$_ // ''} @{$result{$format}{$sub}{$input}}{qw/success flag content error/};
      }
    }
  }
}