#!/usr/bin/perl
package Win32::Shortkeys::Ripper;
use strict;
use warnings;
use Config::YAML::Tiny;
#use IO::File;
use Encode;
#use locale;
use XML::Parser;
use Data::Dumper;
#use File::Copy;
#use Getopt::Long;
#use File::BOM qw( :all );
my $shk_path = "";
my $shkold_path = "";
my $tmp_path = "";
#GetOptions('fpath=s'=> \$shk_path, 'foldpath=s' => \$shkold_path, 'tmpath=s'=> \$tmp_path);
my $erasesignal;
my $comchain;
my $TF=undef;
my $keepsit;
my $afterstarttag;
my $beforestarttag;
=head1 SYNOPSIS
Win32::Shortkeys::Ripper::catch(
shk_file => "shortkey_utf8.xml",
tmp_file => "mytmp.txt",
shk_old => "shortkeys_utf8.xml_old.txt",
encoding => "UTF-8",
properties => "ripper.properties"
);
Make a backup of shortkeys_utf8.xml in shortkeys_utf8.xml_old.txt and remove the content of the data elememts in shortkeys_utf8.xml.
Other parameters are given in a properties file described below.
Default values
=over
=item *
for encoding : UTF-8
=item *
for tmp_file : tmp.txt
=item *
for properties file name : ripper.properties
=back
=cut
sub catch {
my %defaults = (tmp_file => "tmp.txt", encoding=> "UTF-8", properties=> "ripper.properties");
my %args = (%defaults, @_);
my $config = Config::YAML::Tiny->new( config => $args{properties} );
$keepsit = $config->get_tag_to_keep;
$afterstarttag = $config->get_after_start_tag;
$beforestarttag = $config->get_before_start_tag;
#die Dumper $keepsit;
#open( my $FH, "<:raw:utf8", $args{shk_file} )
open( my $FH, "<:encoding($args{encoding})", $args{shk_file} )
or die "Unable to open xml file: $!";
# http://blogs.msdn.com/brettsh/archive/2006/06/07/620986.aspx
#open (my $TF, ">:raw:encoding(UTF16-LE):crlf:utf8", $tmp_path) or die "Unable to make new temporary file: $!";
#with utf-8 this would suffice open( $TF, ">:encoding($args{encoding})", $args{tmp_file} )
# http://grokbase.com/t/perl/unicode/111hf8z9as/encoding-utf16-le-on-windows
# Files opened on Windows already have the :crlf layer pushed by default,
#so you somehow need to get the :encoding layer *below* it. If you have it on top,
#then the crlf substitution happens *after* the encoding, leading to incorrect data.
#this open( $TF, ">:encoding($args{encoding})", $args{tmp_file} ) or die "Unable to make new temporary file: $!";
#insert "blank" between each character, I suspect this comes from the explanation above
open( $TF, ">:raw:encoding($args{encoding}):crlf", $args{tmp_file} ) or die "Unable to make new temporary file: $!";
print $TF "\N{BOM}" if ($args{encoding} eq "UTF-8" || $args{encoding} eq "UTF-16BE");
#commentaire vide : <!-----> trois lignes sucessives font que les données ne sont plus
#produites. Le prochain commentaire non vide remet le compteur à zéro
$erasesignal = 2;
$comchain = 0;
# ProtocolEncoding => $args{encoding}, #with this accetended char in shortkey's name are preserved
my $p = XML::Parser->new(
ErrorContext => 2,
ProtocolEncoding => $args{encoding}
);
$p->setHandlers(
'Start' => \&Win32::Shortkeys::Ripper::MySubs::start,
'Char' => \&Win32::Shortkeys::Ripper::MySubs::char,
'End' => \&Win32::Shortkeys::Ripper::MySubs::end,
'Comment' => \&Win32::Shortkeys::Ripper::MySubs::com,
'Default' => \&Win32::Shortkeys::Ripper::MySubs::def,
);
$p->parse($FH);
close $FH;
close $TF;
#rename ("../../shortkeys.xml", "../../shortkeys.xml_old.txt") or die "can't rename $!";
#rename ("../../tmp.txt", "../../shortkeys.xml") or die "can't rename $!";
rename( $args{shk_file}, $args{old_file} ) or die "can't copy $!";
rename( $args{tmp_file}, $args{shk_file} ) or die "can't rename $!";
}
package Win32::Shortkeys::Ripper::MySubs;
#use Data::Dumper;
my $current;
sub start {
my ( $p, $el, %atts ) = @_;
my $k = $atts{k};
$comchain = 0 unless $comchain > 2;
my $rtl = get_rtl( $k, $beforestarttag->{$el} );
if ( $el eq "dataref" ) { $k = "dataref"; }
if ( $k && $keepsit->{$k} ) {
$current = $k;
}
#print "comments : $comchain ";
return if $comchain > $erasesignal;
# print "rtl : " . ($rtl eq "" ? " vide" : " retour ligne") . " el: $el\n";
print $TF $rtl . "<" . $el;
my $at;
foreach my $v ( keys %atts ) {
$at .= " " . $v . "= '" . $atts{$v} . "'";
}
print $TF $at if ($at);
#print $TF ">$end", "$afterstarttag{$el}";
print $TF ">" . get_rtl( $atts{k}, $afterstarttag->{$el} );
}
sub def {
my ( $p, $el ) = @_;
print $TF "$el";
}
sub end {
my ( $p, $el ) = @_;
return if $comchain > $erasesignal;
if ( $current && $current ne "dataref" ) {
$current = undef;
}
print $TF "</" . $el . ">";
}
sub char {
my ( $p, $s ) = @_;
return if $comchain > $erasesignal;
if ( $current && $keepsit->{$current} ) {
print $TF "$s";
}
}
sub com {
my ( $p, $s ) = @_;
if ( $s eq "" ) {
$comchain++;
}
else {
$comchain = 0;
}
print $TF "\n<!--" . $s . "-->";
}
sub get_rtl {
my ( $at, $mhref ) = @_;
my $rtl = "";
# print "el: $at ";
if ( $mhref && ref($mhref) eq "HASH" ) {
#my $href = $beforestarttag{$el};
#print "get_rtl: " . $href->{$at}. "\n" ;
$rtl = $mhref->{$at} if ( $at && exists ${$mhref}{$at} );
}
else {
$rtl = $mhref if ($mhref);
}
return $rtl;
}
=head1 DESCRIPTION
=head2 Properties file
It's name default to C<ripper.properties>. Use the C<properties => $file_name> to change that.
The file must follow YANL::Tiny::Simple syntax and contain three elements C<before_start_tag after_start_tag tag_to_keep>.
after_start_tag:
data:
a: "\n\n"
d: "\n"
t: "\n\n"
....
dataref: ''
shortkey: ''
before_start_tag:
data:
a: "\n"
d: "\n"
t: "\n"
....
dataref: ''
shortkey: ''
tag_to_keep:
a: 0
d: 0
dataref: 1
f: 1
hg: 1
hj: 1
j: 0
k: 0
l: 1
y: 0
z: 1
See the files in the example folder: ripper.properties is used clear shorkeys_utf8.xml.
=over
=item * before_start_tag after_start_tag
contains the sub elements C<data dataref shortkey>. The data element enumerates, using the k attribute, the element's start tag that are preceded (C<before_start_tag> or followed C<after_start_tag> by line break(s).
=item * tag_to_keep
Enumerate the data element using the value of the k attribute that you want to clear C<attribute:0> or keep unchanged C<attribute:1>. C<dataref> is also listed.
=back
=head1 SUPPORT
Any questions or problems can be posted to me (rappazf) on my gmail account.
The current state of the source can be extract using Mercurial from
L<http://sourceforge.net/projects/win32-shortkeys/>.
=head1 AUTHOR
FranE<ccedil>ois Rappaz <rappazf@gmail.com>
=head1 COPYRIGHT
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut
1;