package Statistics::Covid::DataProvider::World::JHU;
# John Hopkins University
use 5.006;
use strict;
use warnings;
our $VERSION = '0.23';
use parent 'Statistics::Covid::DataProvider::Base';
use DateTime;
use File::Spec;
use File::Path;
use Data::Dump qw/pp/;
use Statistics::Covid::Utils;
# new method inherited but here we will create one
# to be used as a factory
sub new {
my ($class, $params) = @_;
$params = {} unless defined $params;
$params->{'urls'} = [
[ # start a url
# check the resultRecordCount=10000 and where=TotalCases%20%3E%3D%200
# modified for where=TotalCases%20%3D%3E%200 (that is >=0) and resultRecordCount=10000
'https://services9.arcgis.com/N9p5hsImWXAccRNI/arcgis/rest/services/Z7biAeD8PAkqgmWhxG2A/FeatureServer/1/query?f=json&where=Confirmed%20%3E%3D%200&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&orderByFields=Confirmed%20desc%2CCountry_Region%20asc%2CProvince_State%20asc&resultOffset=0&resultRecordCount=250&cacheHint=true',
#'https://services9.arcgis.com/N9p5hsImWXAccRNI/arcgis/rest/services/Z7biAeD8PAkqgmWhxG2A/FeatureServer/1/query?cacheHint=true&f=json&orderByFields=Confirmed+desc%2CCountry_Region+asc%2CProvince_State+asc&outFields=*&resultOffset=0&resultRecordCount=250&returnGeometry=false&spatialRel=esriSpatialRelIntersects&where=Confirmed+%3E+0',
# the headers associated with that url
[
'Cache-Control' => 'max-age=0',
'Connection' => 'keep-alive',
'Accept' => '*/*',
'Accept-Encoding' => 'gzip, x-gzip, deflate, x-bzip2, bzip2',
'Accept-Language' => 'en-US,en;q=0.5',
'Host' => 'services9.arcgis.com:443',
# likes this: 'Mon, 16 Mar 2020 21:14:13 GMT',
'If-Modified-Since' => DateTime->now(time_zone=>'GMT')->add(minutes=>-1)->strftime('%a, %d %b %Y %H:%M:%S %Z'),
'If-None-Match' => 'sd8_-224912290',
'Referer' => 'https://services9.arcgis.com/N9p5hsImWXAccRNI/arcgis/rest/services/Z7biAeD8PAkqgmWhxG2A/FeatureServer/1/query?f=json&where=Confirmed%20%3E%200&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&orderByFields=Confirmed%20desc%2CCountry_Region%20asc%2CProvince_State%20asc&resultOffset=0&resultRecordCount=250&cacheHint=true',
'TE' => 'Trailers',
# we have our own default
#'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.20; rv:61.0) Gecko/20100101 Firefox/73.0',
'DNT' => '1',
'Origin' => 'https://www.arcgis.com',
] # end headers
] # end a url
]; # end 'urls'
# initialise our parent class
my $self = $class->SUPER::new($params);
if( ! defined $self ){ warn "error, call to $class->new() has failed."; return undef }
# and do set parameters specific to this particular data provider
$self->name('JHU'); # <<<< Make sure this is unique over all providers
$self->datafilesdir(File::Spec->catfile(
$self->datafilesdir(), # use this as prefix it was set in config
'World', $self->name() # and append a dir hierarchy relevant to this provider
));
# initialise this particular data provider
if( ! $self->init() ){ warn "error, call to init() has failed."; return undef }
# this will now be JHU obj (not generic)
return $self
}
# overwriting this from parent
# returns undef on failure or a data id unique on timepoint
# which can be used for saving data to a file or labelling this data
sub create_data_id {
my $self = $_[0];
my $datas = $_[1]; # this is an arrayref of [url, data_received_string, data_as_perlvar]
# get the date from the first pv
# this json is idiotic because it's just arrays,
# 0: location id
# 1: location name
# 2: cases
# 3: population
# unless [0] is 'UpdatedOn', in which case [1] is 09:00 GMT, 15 March
# thankfully this update info is last
my $date = undef;
my $aurl = $datas->[0]->[0];
my $apv = $datas->[0]->[2];
# note this is in milliseconds epoch, but parser will take care
# also note that this is about countries and each country has its own last-update
# some countries (only china?) have province data too
# so, for the time being find the maximum epoch which is the latest data at least one country was updated
# epoch and index in the array
my $latest = [$apv->{'features'}->[0]->{'attributes'}->{'Last_Update'}, 0];
my $epoch_date_str;
for(my $i=scalar(@{$apv->{'features'}});$i-->1;){
# note that this is millis epoch
$epoch_date_str = $apv->{'features'}->[$i]->{'attributes'}->{'Last_Update'} + 0;
if( $epoch_date_str > $latest->[0] ){ $latest = [$epoch_date_str, $i] }
}
$epoch_date_str = $apv->{'features'}->[$latest->[1]]->{'attributes'}->{'Last_Update'};
if( ! defined($date=Statistics::Covid::Utils::epoch_milliseconds_to_DateTime($epoch_date_str)) ){
warn "error, failed to parse date '$epoch_date_str' from input json data just transfered from url '$aurl'.";
return undef;
}
my $dataid = $date->strftime('2020-%m-%dT%H.%M.%S')
. '_'
. $date->epoch()
;
print "create_data_id() : using last updated time of '".$apv->{'features'}->[$latest->[1]]->{'attributes'}->{'Country_Region'}."', last updated on: ".$date->iso8601()."\n";
return $dataid
}
# returns the data read if successful or undef if failed
sub load_fetched_data_from_localfile {
my $self = $_[0];
my $inbasename = $_[1];
my $infile = $inbasename . '.data.json';
my $infh;
if( ! open($infh, '<:encoding(UTF-8)', $infile) ){ warn "error, failed to open file '$infile' for reading, $!"; return undef }
my $json_contents; {local $/=undef; $json_contents = <$infh> } close $infh;
my $pv = Statistics::Covid::Utils::json2perl($json_contents);
if( ! defined $pv ){ warn "error, call to ".'Statistics::Covid::Utils::json2perl()'." has failed (for data, file '$infile')."; return undef }
return [['file://'.$infile, $json_contents, $pv]];
}
sub create_Datums_from_fetched_data {
my $self = $_[0];
my $datas = $_[1]; # the fetched data as an arrayref with 1 element which is an array of [url, data_received_string, data_as_perlvar]
my $data = $datas->[0]->[2]->{'features'}; # getting to the array of locations
# data is an array of
# {
# attributes => {
# Active => 6285,
# Admin2 => undef,
# Combined_Key => 'fix',
# Confirmed => 67800,
# Country_Region => "China",
# Deaths => 3133,
# FIPS => 'fix',
# Last_Update => 1584690182000,
# Lat => 30.9756403482891,
# Long_ => 112.270692167452,
# OBJECTID => 106,
# Province_State => "Hubei",
# Recovered => 58382,
# },
# and for countries only data
# {
# attributes => {
# Active => 91,
# Admin2 => 'fix',
# Combined_Key => 'fix',
# Confirmed => 95,
# Country_Region => "Cyprus",
# Deaths => 1,
# FIPS => 'fix',
# Last_Update => 1584895387000,
# Lat => 35.1264,
# Long_ => 33.4299,
# OBJECTID => 7,
# Province_State => 'fix',
# Recovered => 3,
# },
my $ds = $self->name();
my ($name, $belongsto, $datetimeobj);
my @ret = ();
for my $aWorldLocation (@$data){
$aWorldLocation = $aWorldLocation->{'attributes'};
if( ! exists $aWorldLocation->{'Province_State'}
or ! defined $aWorldLocation->{'Province_State'}
or $aWorldLocation->{'Province_State'} eq 'fix'
){
$name = $aWorldLocation->{'Country_Region'};
$belongsto = 'World'; # default for countries!
} else {
$name = $aWorldLocation->{'Province_State'};
$belongsto = $aWorldLocation->{'Country_Region'};
}
$datetimeobj = Statistics::Covid::Utils::epoch_milliseconds_to_DateTime($aWorldLocation->{'Last_Update'});
if( ! defined $datetimeobj ){ warn pp($aWorldLocation)."\n\nerror, call to ".'Statistics::Covid::Utils::epoch_milliseconds_to_DateTime()'." has failed for date field of 'Last_Update' in the above parameters (it must be milliseconds since unix epoch. A filename (or a url) may be associated with it at\n ".$datas->[0]->[0]."\n"; return undef }
my $datumobj = Statistics::Covid::Datum->new({
'id' => join('/',
$aWorldLocation->{'Country_Region'}, $aWorldLocation->{'Lat'}, $aWorldLocation->{'Long_'}),
'name' => $name,
'belongsto' => $belongsto,
'confirmed' => $aWorldLocation->{'Confirmed'},
'recovered' => $aWorldLocation->{'Recovered'},
'terminal' => $aWorldLocation->{'Deaths'},
# what is 'Active'?
'date' => $datetimeobj,
'type' => 'Country or Region',
'datasource' => $ds,
});
if( ! defined $datumobj ){ warn "error, call to ".'Statistics::Covid::Datum->new()'." has failed for this data: ".join(",", @$aWorldLocation); return undef }
push @ret, $datumobj
}
return \@ret
}
# saves data received as JSON and PL (perl variables)
# into files specified by an optional basename (input param: $outbase)
# OR if no outbase is specified, it creates one
# as a timestamped id and the dir will be the datafielesdir()
# as it was specified in its config during construction
# '$datas' is an arrayref of
# [ [url, data_received_string, data_as_perlvar] ]
# this provider does not have any metadata, all data is received in 1 chunk
# returns undef on failure or the basename if successful
sub save_fetched_data_to_localfile {
my $self = $_[0];
my $datas = $_[1]; # this is an arrayref of [url, data_received_string, data_as_perlvar]
my $outbase = $_[2]; # optional outbase
if( ! defined $outbase ){
my $dataid = $self->create_data_id($datas);
if( ! defined $dataid ){
warn "error, call to ".'create_data_id()'." has failed.";
return undef;
}
$outbase = File::Spec->catfile($self->datafilesdir(), $dataid);
}
my $index = 0;
my $outfile = $outbase . '.data.json';
my $aurl = $datas->[$index]->[0];
if( ! Statistics::Covid::Utils::save_text_to_localfile($datas->[$index]->[1], $outfile) ){ warn "error, call to ".'save_text_to_localfile()'." has failed for url '$aurl'."; return undef }
$outfile = $outbase . '.data.pl';
if( ! Statistics::Covid::Utils::save_perl_var_to_localfile($datas->[$index]->[2], $outfile) ){ warn "error, call to ".'save_perl_var_to_localfile()'." has failed for url '$aurl'."; return undef }
print "save_fetched_data_to_localfile() : saved data to base '$outbase'.\n";
return $outbase;
}
1;
__END__
# end program, below is the POD