use strict;
use warnings;
use HTTP::Tiny;
use Mojo::DOM58;
use Encode 'decode';

# Extract named character references from HTML Living Standard
my $res = HTTP::Tiny->new->get('https://html.spec.whatwg.org');
my $dom = Mojo::DOM58->new(decode 'UTF-8', $res->{content});
my $rows = $dom->find('#named-character-references-table tbody > tr');
for my $row ($rows->each) {
  my $entity     = $row->at('td > code')->text;
  my $codepoints = $row->children('td')->[1]->text;
  $codepoints =~ s/^\s*//;
  $codepoints =~ s/\s*$//;
  print "$entity $codepoints\n";
}

1;