Author image Michal Josef Špaček

NAME

 Tags::Reader::Perl - Parse SGML/HTML/XML by each "tag".

SYNOPSIS

 use Tags::Reader::Perl;

 my $obj = Tags::Reader::Perl->new;
 my @tokens = $obj->gettoken;
 $obj->set_file($file, $force);
 $obj->set_text($text, $force);

METHODS

new()

 my $obj = Tags::Reader::Perl->new;

Constructor.

Returns instance of object.

gettoken

 my @tokens = $obj->gettoken;

Get parsed token.

Returns structure defining parsed token in array context. See TOKEN STRUCTURE e.g. <xml> → ('<xml>', 'xml', 1, 1)

Returns parsed token in scalar mode. e.g. <xml> → '<xml>'

set_file

 $obj->set_file($file, $force);

Set file for parsing. If $force present, reset file for parsing if exists previous text or file.

Returns undef.

set_text

 $obj->set_text($text, $force);

Set text for parsing. if $force present, reset text for parsing if exists previous text or file.

Returns undef.

TOKEN STRUCTURE

 Structure contains 4 fields in array:
 - parsed data
 - tag type
 - number of line
 - number of column in line

 Tag types are:
 - '[\w:]+' - element name.
 - '/[\w:]+' - end of element name.
 - '!data' - data
 - '![cdata[' - cdata
 - '!--' - comment
 - '?\w+' - instruction
 - '![\w+' - conditional
 - '!attlist' - DTD attlist
 - '!element' - DTD element
 - '!entity' - DTD entity
 - '!notation' - DTD notation

ERRORS

 new():
         From Class::Utils::set_params():
                 Unknown parameter '%s'.

 set_text():
         Bad tag.
         Bad text.
         Cannot set new data if exists data.

 set_file():
         Bad tag.
         Bad file.
         Cannot set new data if exists data.
         Cannot open file '%s'.

EXAMPLE1

 use strict;
 use warnings;

 use Tag::Reader::Perl;
 use Unicode::UTF8 qw(decode_utf8 encode_utf8);

 # Object.
 my $obj = Tag::Reader::Perl->new;

 # Example data.
 my $sgml = <<'END';
 <DOKUMENT> 
   <adresa stát="cs">
     <město>
     <ulice>Nová</ulice>
     <číslo>5</číslo>
   </adresa>
 </DOKUMENT>
 END

 # Set data to object.
 $obj->set_text(decode_utf8($sgml));

 # Tokenize.
 while (my @tag = $obj->gettoken) {
         print "[\n";
         print "\t[0]: '".encode_utf8($tag[0])."'\n";
         print "\t[1]: ".encode_utf8($tag[1])."\n";
         print "\t[2]: $tag[2]\n";
         print "\t[3]: $tag[3]\n";
         print "]\n";
 }

 # Output:
 # [
 #      [0]: '<DOKUMENT>'
 #      [1]: dokument
 #      [2]: 1
 #      [3]: 1
 # ]
 # [
 #      [0]: ' 
 #   '
 #      [1]: !data
 #      [2]: 1
 #      [3]: 11
 # ]
 # [
 #      [0]: '<adresa stát="cs">'
 #      [1]: adresa
 #      [2]: 2
 #      [3]: 3
 # ]
 # [
 #      [0]: '
 #     '
 #      [1]: !data
 #      [2]: 2
 #      [3]: 21
 # ]
 # [
 #      [0]: '<město>'
 #      [1]: město
 #      [2]: 3
 #      [3]: 5
 # ]
 # [
 #      [0]: '
 #     '
 #      [1]: !data
 #      [2]: 3
 #      [3]: 12
 # ]
 # [
 #      [0]: '<ulice>'
 #      [1]: ulice
 #      [2]: 4
 #      [3]: 5
 # ]
 # [
 #      [0]: 'Nová'
 #      [1]: !data
 #      [2]: 4
 #      [3]: 12
 # ]
 # [
 #      [0]: '</ulice>'
 #      [1]: /ulice
 #      [2]: 4
 #      [3]: 16
 # ]
 # [
 #      [0]: '
 #     '
 #      [1]: !data
 #      [2]: 4
 #      [3]: 24
 # ]
 # [
 #      [0]: '<číslo>'
 #      [1]: číslo
 #      [2]: 5
 #      [3]: 5
 # ]
 # [
 #      [0]: '5'
 #      [1]: !data
 #      [2]: 5
 #      [3]: 12
 # ]
 # [
 #      [0]: '</číslo>'
 #      [1]: /číslo
 #      [2]: 5
 #      [3]: 13
 # ]
 # [
 #      [0]: '
 #   '
 #      [1]: !data
 #      [2]: 5
 #      [3]: 21
 # ]
 # [
 #      [0]: '</adresa>'
 #      [1]: /adresa
 #      [2]: 6
 #      [3]: 3
 # ]
 # [
 #      [0]: '
 # '
 #      [1]: !data
 #      [2]: 6
 #      [3]: 12
 # ]
 # [
 #      [0]: '</DOKUMENT>'
 #      [1]: /dokument
 #      [2]: 7
 #      [3]: 1
 # ]
 # [
 #      [0]: '
 # '
 #      [1]: !data
 #      [2]: 7
 #      [3]: 12
 # ]

DEPENDENCIES

Class::Utils, Error::Pure, Readonly,

SEE ALSO

Tag::Reader

Parse SGML/HTML/XML by each "tag".

HTML::TagReader

Perl extension module for reading html/sgml/xml files by tags.

REPOSITORY

https://github.com/michal-josef-spacek/Tag-Reader-Perl

AUTHOR

Michal Josef Špaček mailto:skim@cpan.org

http://skim.cz

LICENSE AND COPYRIGHT

© Michal Josef Špaček 2005-2021

BSD 2-Clause License

VERSION

0.02