#!/usr/bin/env perl
#
# bookmarks - Export browser bookmarks as plain text.
#
# bookmarks is a tool to export bookmarks from files supplied as arguments, or
# from browsers default locations (when called without arguments). The following
# sources are supported :
#
# - Safari (.plist)
# - Firefox (.sqlite)
# - Chrome and Edge (Bookmarks)
# - Internet Explorer (Favorites)
# - Markdown (.md)
# - Gemini (.gmi)
# - Surfraw (same as plain text)
# - Plain text (any other extension)
#
# Default export format : <title> <url> <description>
#
# - <title> is your bookmark's name, alias, or webpage title.
# - <url> is your bookmark's address, URL or URI.
# - <description> is empty for Chrome, Edge, Internet Explorer or Gemini. It 
#   contains Safari 'Description', Firefox 'Tags' and what the Markdown spec
#   calls the 'Title' (just the tooltip, actually).
# 
# Markdown, Gemini and plain text files are processed line by line (as UTF-8) :
#
#   [markdown example](http://example.md/ "with description")
#   => gemini://example.gmi gemini example
#   plain text example http://example.txt with description
#
# Implementation Notes :
#
# - For Safari, this tool relies on dirty plist parsing using Apple's 'plutil'
#   command. It should use Mac::PropertyList instead. For performance reasons
#   when used interactively, I've decided to keep it like that (see README.md).
# - Since Firefox sets an EXCLUSIVE SQLite lock, a tmp DB file copy is used.
# - sources are read as UTF-8.
#
# 2021.03.30 v0.31 jul : skip gemini.t if URI::Find not installed
# 2021.03.29 v0.30 jul : added great markdown regex by Michaël Perrin
#                        process files without extension as plain text
#                        fixed extra spaces printed after empty fields
#                        added gemini support
# 2021.02.17 v0.28 jul : fixed missing require DBD::SQLite (firefox)
# 2021.02.16 v0.27 jul : added win32 Edge support, fixed Chrome bug (localappdata), fixed win32 bug (find), more tests (firefox)
# 2021.01.31 v0.26 jul : improved handling of optional dependencies
# 2021.01.31 v0.25 jul : skip bookmarks.t if URI::Find not installed
# 2021.01.31 v0.24 jul : fixed missing prereq URI::Find in Makefile.PL
# 2021.01.23 v0.23 jul : added tests and fixed firefox default location
# 2021.01.20 v0.22 jul : fixed firefox query again
# 2020.06.29 v0.21 jul : added markdown and text files support
# 2020.03.10 v0.20 jul : fixed EXCLUSIVE SQLite lock set by Firefox
# 2019.12.22 v0.19 jul : added chrome support, require, bugfixes
# 2019.10.22 v0.17 jul : created module
# 2019.10.17 v0.16 jul : fixed firefox tags
# 2019.09.27 v0.15 jul : added internet explorer support, fixed firefox tags
# 2019.08.13 v0.14 jul : added firefox support, output format
# 2019.07.11 v0.13 jul : use 5.010, better doc
# 2019.01.14 v0.12 jul : fixed case sensitive regex
# 2018.09.21 v0.11 jul : added arg and -a
# 2018.09.01 v0.10 jul : created

use 5.010;
use strict;
use warnings;
use utf8;
use Getopt::Std;
use File::Basename;
use File::Temp qw(tempdir);
use File::Copy qw(copy);

our $VERSION = '0.30';
my $program  = basename($0);
my $usage    = <<EOF;

Usage: $program [-hVdas] [-f format] [file ...]

    -h, --help      help
    -V, --version   version
    -d              debug
    -a              all files : process arguments and default locations
    -f format       any combination of letters t,u,d as title/url/description 
                    (default : tud)
    -s              find schemeless URLs in text files (default : no)
EOF

# options

my %options = ();
getopts("hVdaf:s", \%options) or die $usage;

my $help        = $options{h} // 0;
my $version     = $options{V} // 0;
my $debug       = $options{d} // 0;
my $all         = $options{a} // 0;
my $format      = $options{f} // "tud";
my $schemeless  = $options{s} // 0;

die $usage if $help;
die $VERSION . "\n" if $version;

# option -a
if (!@ARGV or $all)
{
    if ($^O eq "darwin")
    {    
        push @ARGV, glob('~/Library/Safari/Bookmarks.plist');
        push @ARGV, glob('~/Library/Application\ Support/Firefox/Profiles/*.default*/places.sqlite');
        push @ARGV, glob('~/Library/Application\ Support/Google/Chrome/Default/Bookmarks');
    }
    elsif ($^O eq "linux")
    {
        push @ARGV, glob('~/.mozilla/firefox/*.default/places.sqlite');
        push @ARGV, glob('~/.config/google-chrome/Default/Bookmarks');
    }
    elsif ($^O eq "MSWin32")
    {
        push @ARGV, $ENV{APPDATA} . '\Mozilla\Firefox\Profiles\*.default\places.sqlite';
        push @ARGV, $ENV{LOCALAPPDATA} . '\Google\Chrome\User Data\Default\Bookmarks';
        push @ARGV, $ENV{USERPROFILE} . '\Favorites';
        push @ARGV, $ENV{LOCALAPPDATA} . '\Microsoft\Edge\User Data\Default\Bookmarks';
    }
    else
    {
        die "unknown os, unable to set default files";
    }
    
    # do they indeed exist?
    @ARGV = grep { -e $_ } @ARGV;
}

# option -f
my %dispatch = (
't'     => sub { my ($t,$u,$d) = @_; say join ' ', grep {defined $_ and length $_} ($t);       },
'tu'    => sub { my ($t,$u,$d) = @_; say join ' ', grep {defined $_ and length $_} ($t,$u);    },
'tud'   => sub { my ($t,$u,$d) = @_; say join ' ', grep {defined $_ and length $_} ($t,$u,$d); },
'tdu'   => sub { my ($t,$u,$d) = @_; say join ' ', grep {defined $_ and length $_} ($t,$d,$u); },
'td'    => sub { my ($t,$u,$d) = @_; say join ' ', grep {defined $_ and length $_} ($t,$d);    },
'u'     => sub { my ($t,$u,$d) = @_; say join ' ', grep {defined $_ and length $_} ($u);       },
'ud'    => sub { my ($t,$u,$d) = @_; say join ' ', grep {defined $_ and length $_} ($u,$d);    },
'udt'   => sub { my ($t,$u,$d) = @_; say join ' ', grep {defined $_ and length $_} ($u,$d,$t); },
'utd'   => sub { my ($t,$u,$d) = @_; say join ' ', grep {defined $_ and length $_} ($u,$t,$d); },
'ut'    => sub { my ($t,$u,$d) = @_; say join ' ', grep {defined $_ and length $_} ($u,$t);    },
'd'     => sub { my ($t,$u,$d) = @_; say join ' ', grep {defined $_ and length $_} ($d);       },
'du'    => sub { my ($t,$u,$d) = @_; say join ' ', grep {defined $_ and length $_} ($d,$u);    },
'dut'   => sub { my ($t,$u,$d) = @_; say join ' ', grep {defined $_ and length $_} ($d,$u,$t); },
'dtu'   => sub { my ($t,$u,$d) = @_; say join ' ', grep {defined $_ and length $_} ($d,$t,$u); },
'dt'    => sub { my ($t,$u,$d) = @_; say join ' ', grep {defined $_ and length $_} ($d,$t);    },
);

# printer function
my $print_bookmark;

if ($dispatch{$format})
{
    $print_bookmark = $dispatch{$format};
}
else
{
    die "unknown format";
}


###############
# SUBROUTINES #
###############

sub _safari {

    my $plist = shift // "";
    warn "\$plist: $plist\n" if $debug;

    # validate plutil and plist
    my $res = `plutil $plist`;
    die "plutil failed : $res" if $res !~ /OK$/;

    # read plist as text
    my $text = `plutil -p $plist`;
    die "plutil failed on file $plist" if not $text;
    
    # split on ddd => {}
    my @pieces = split /\d+ => \{(.*?)\}\s+\d+ => \{/s, $text;
    
    # find bookmarks among pieces
    my @bookmarks = grep /URLString/, @pieces;
    
    # print
    foreach my $bm (@bookmarks)
    {
        my $title       = $1 if $bm =~ /"title" => "(.+)"/i;
        my $url         = $1 if $bm =~ /"URLString" => "(.+)"/i;
        my $description = $1 if $bm =~ /"PreviewText" => "(.+)"/i;
        
        $print_bookmark->($title, $url, $description);
    }
}

sub _firefox {

    eval {
        require DBI;
        require DBD::SQLite;
    };
    if ($@)
    {
        die "to process Firefox bookmarks, you need to install modules DBI DBD::SQLite\n";
    }

    my $dbfile = shift // "";
    warn "\$dbfile: $dbfile\n" if $debug;

    # fix EXCLUSIVE SQLite lock set by Firefox
    my $dir = tempdir( CLEANUP => 1 );
    copy $dbfile, $dir or die "unable to copy file : $dbfile";
    
    # from now on, we use a tmp db copy
    $dbfile = "$dir/" . basename($dbfile);
  
    my $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", # DSN: dbi, driver, database file
                           "",                          # no user
                           "",                          # no password
                           { RaiseError => 1, PrintError => 0, AutoCommit => 0 },  # RaiseError=die() PrintError=warn()
                           ) or die DBI->errstr;

    # build sql statement
    my $sql =  "
    select b.title, p.url, t.tags as description
    from moz_bookmarks b
    left join 
    (
        select fk, group_concat(tag, ' ') as tags
        from
        (
            select distinct
            b1.fk as fk,
            b3.title as tag
            from moz_bookmarks b1
            left join moz_bookmarks b2 on b2.fk = b1.fk and b2.type = 1 and b2.title is null
            left join moz_bookmarks b3 on b3.id = b2.parent 
            where b1.type = 1 and b1.title is not null
        )
        group by fk
    ) t on t.fk = b.fk
    left join moz_places p on p.id = b.fk
    left join moz_origins o on o.id = p.origin_id
    where b.title is not null
    and o.prefix != 'place:'
    ";
    warn "\$sql: $sql\n" if $debug;

    # prepare and execute transaction
    eval
    {
        my $sth = $dbh->prepare($sql);
        $sth->execute();

        if ($sth)
        {
            while ($sth and my $hashref = $sth->fetchrow_hashref)
            {
                $print_bookmark->( $hashref->{'title'}, $hashref->{'url'}, $hashref->{'description'} );
            }
        }     
    };
    
    if ($@)
    {
        warn "transaction failed : $@";
        die  "unable to process file : $dbfile";
    }

    $dbh->disconnect;
}

sub _chrome {

    eval {
        require File::Slurper;
        require JSON;
    };
    if ($@)
    {
        die "to process Chrome bookmarks, you need to install modules File::Slurper JSON\n";
    }

    my $file = shift // "";
    warn "\$file: $file\n" if $debug;

    # read plist as text
    my $text = File::Slurper::read_binary($file);
    
    # decode utf8 json
    my $hashref = JSON::decode_json($text);
    
    # print
    foreach my $place ( ('bookmark_bar','other') )
    {
        my $arrayref = $hashref->{'roots'}->{$place}->{'children'};
    
        foreach my $i (@$arrayref)
        {
            $print_bookmark->( $i->{'name'}, $i->{'url'});
        }
    }
}

sub _iexplorer {
    
    eval {
        require Config::Any;
        require Config::Tiny;   # for Config::Any::INI
        require Win32;          # for windows rubbish
        require File::Find;
    };
    if ($@)
    {
        die "to process Internet Explorer favorites, you need to install modules Config::Any Config::Tiny Win32 File::Find\n";
    }
    
    my $favorites = shift // "";
    warn "\$favorites: $favorites\n" if $debug;

    # search in favorites and subfolders
    my @files;
    File::Find::find( { wanted => sub { push @files, $_ }, no_chdir => 1 }, $favorites );

    foreach my $file (@files)
    {
        my @filepaths = ($file);

        # force load internet shortcuts .url as INI files
        my @plugins = ('Config::Any::INI');
        my $cfg = Config::Any->load_files( {files => \@filepaths, force_plugins => \@plugins} );

        # workaround because system encoding != console encoding
        my $win32_old_cp = Win32::GetConsoleOutputCP();
        my $win32_new_cp = Win32::GetACP();
        
        # change console codepage
        Win32::SetConsoleOutputCP($win32_new_cp);

        for (@$cfg)
        {
            my ($filename, $config) = %$_;

            my $title   = substr(basename($filename), 0, -4); # chop ".url"
            my $url     = $config->{'InternetShortcut'}->{'URL'} // "";

            $print_bookmark->($title, $url);
        }

        # restore console codepage
        Win32::SetConsoleOutputCP($win32_old_cp);
    }
}

sub _txt {

    eval {
        require URI::Find;
        require URI::Find::Schemeless;
    };
    if ($@)
    {
        die "to process text files, you need to install module URI::Find\n";
    }

    my $file = shift // "";
    warn "\$file: $file\n" if $debug;

    # get uri regex
    my $class = $schemeless ? "URI::Find::Schemeless" : "URI::Find";    # copy-paste from urifind
    my $finder = $class->new( sub {
        my ($uri, $orig_uri) = @_;
        return $orig_uri;
    });

    my $regex = $finder->uri_re;
    $regex = $regex . '|' . $finder->schemeless_uri_re if $schemeless;

    # read file as text
    open(my $fh, '<:encoding(UTF-8)', $file) or die "unable to read file : $file";
 
    while (my $line = <$fh>)
    {
        if ( $line =~ /(.*)\s+($regex)\s+(.*)/ )    # match <title> <url> <description>
        {
            my $title       = $1;
            my $url         = $2;
            my $description = $3;

            $print_bookmark->($title, $url, $description);
        }   
    }
}

sub _gemini {

    eval {
        require URI::Find;
        require URI::Find::Schemeless;
    };
    if ($@)
    {
        die "to process Gemini files, you need to install module URI::Find\n";
    }

    my $file = shift // "";
    warn "\$file: $file\n" if $debug;

    # get uri regex
    my $class = $schemeless ? "URI::Find::Schemeless" : "URI::Find";    # copy-paste from urifind
    my $finder = $class->new( sub {
        my ($uri, $orig_uri) = @_;
        return $orig_uri;
    });

    my $regex = $finder->uri_re;
    $regex = $regex . '|' . $finder->schemeless_uri_re if $schemeless;

    # read file as text
    open(my $fh, '<:encoding(UTF-8)', $file) or die "unable to read file : $file";
 
    while (my $line = <$fh>)
    {
        if ( $line =~ /^=>\s+($regex)\s+(.*)/ )    # match => <url> <title>
        {
            my $title       = $2;
			my $url         = $1;
            my $description = undef;

            $print_bookmark->($title, $url, $description);
        }   
    }
}

sub _md {

    my $file = shift // "";
    warn "\$file: $file\n" if $debug;

    # read file as text
    open(my $fh, '<:encoding(UTF-8)', $file) or die "unable to read file : $file";

    while (my $line = <$fh>)
    {
		# regex by Michaël Perrin :
		# http://blog.michaelperrin.fr/2019/02/04/advanced-regular-expressions/
        if ( $line =~					
		/								
		(?<text_group>                  # Text group, including square brackets
		  \[
		    (?>                         # (?> defines an atomic group, this is a performance improvement when using recursion
		      [^\[\]]+                  # Look for any char except closing square bracket
		      |(?&text_group)           # OR: find recursively an other pattern with opening and closing square brackets
		    )*
		  \]
		)
		(?:
		  \(
		    (?<url>\S*?)                # URL: non-greedy non-whitespace characters
		    (?:
		      [ ]
		      "
		        (?<title>
		          (?:[^"]|(?<=\\)")*?   # Title without double quotes around
		        )
		      "
		    )?
		  \)
		)
		/x
		)    # match [<title>](<url> "<description>") OLD was [<title>](<url>) <description>
        {
            my $title       = $1;
            my $url         = $2;
            my $description = $3;
        
			$title =~ s/^\[|\]$//g; # strip [...]
		
            $print_bookmark->($title, $url, $description);
        }
    }
}


############
# RUN LOOP #
############

foreach my $file (@ARGV)
{
    my $name = basename($file);
    
    if    (-f $file and $name =~ /\.plist$/)   { _safari($file);    }
    elsif (-f $file and $name =~ /\.sqlite$/)  { _firefox($file);   }
    elsif (-f $file and $name =~ /Bookmarks$/) { _chrome($file);    }
    elsif (-d $file and $name =~ /Favorites$/) { _iexplorer($file); }
    elsif (-f $file and $name =~ /\.md$/)      { _md($file);        }
    elsif (-f $file and $name =~ /\.gmi$/)     { _gemini($file);    }
    elsif (-f $file )                          { _txt($file);       }
    else  { die "unable to process file : $file"; }
}

exit 1;

__END__

=head1 NAME

bookmarks - Export browser bookmarks as plain text.

=head1 SYNOPSIS

    $ bookmarks [-hVda] [-f format] [file ...]

    -h, --help      help
    -V, --version   version
    -d              debug
    -a              all files : process arguments and default locations
    -f format       any combination of letters t,u,d as title/url/description
                    (default : tud)
    -s              find schemeless URLs in text files (default : no)

=head1 DESCRIPTION

bookmarks is a tool to export bookmarks from files supplied as arguments, or
from browsers default locations (when called without arguments). The following
sources are supported :

=over

=item - Safari (.plist)

=item - Firefox (.sqlite)

=item - Chrome and Edge (Bookmarks)

=item - Internet Explorer (Favorites)

=item - Markdown (.md)

=item - Gemini (.gmi)

=item - Surfraw (same as plain text)

=item - Plain text (any other extension)

=back

Default export format : <title> <url> <description>

=over
 
=item - <title> is your bookmark's name, alias, or webpage title.

=item - <url> is your bookmark's address, URL or URI.

=item - <description> is empty for Chrome, Edge, Internet Explorer or Gemini. It 
        contains Safari 'Description', Firefox 'Tags' and what the Markdown spec
        calls the 'Title' (just the tooltip, actually).

=back

Markdown, Gemini and plain text files are processed line by line (as UTF-8) :

    [markdown example](http://example.md/ "with description")
    => gemini://example.gmi gemini example
    plain text example http://example.txt with description

=head1 BUGS

Please report any bugs or feature requests to C<kaldor@cpan.org>, or through
the web interface at L<https://github.com/kal247/App-uricolor/issues>.

=head1 AUTHOR

jul, C<kaldor@cpan.org>

=head1 LICENSE AND COPYRIGHT

This software is Copyright (c) 2019-2021 by jul.

This is free software, licensed under:

  The Artistic License 2.0 (GPL Compatible)