####################################################################
#
# The Perl::Tidy::Formatter package adds indentation, whitespace, and
# line breaks to the token stream
#
#####################################################################
# Index...
# CODE SECTION 1: Preliminary code, global definitions and sub new
# sub new
# CODE SECTION 2: Some Basic Utilities
# CODE SECTION 3: Check and process options
# sub check_options
# CODE SECTION 4: Receive lines from the tokenizer
# sub write_line
# CODE SECTION 5: Pre-process the entire file
# sub finish_formatting
# CODE SECTION 6: Process line-by-line
# sub process_all_lines
# CODE SECTION 7: Process lines of code
# process_line_of_CODE
# CODE SECTION 8: Utilities for setting breakpoints
# sub set_forced_breakpoint
# CODE SECTION 9: Process batches of code
# sub grind_batch_of_CODE
# CODE SECTION 10: Code to break long statements
# sub break_long_lines
# CODE SECTION 11: Code to break long lists
# sub break_lists
# CODE SECTION 12: Code for setting indentation
# CODE SECTION 13: Preparing batch of lines for vertical alignment
# sub convey_batch_to_vertical_aligner
# CODE SECTION 14: Code for creating closing side comments
# sub add_closing_side_comment
# CODE SECTION 15: Summarize
# sub wrapup
#######################################################################
# CODE SECTION 1: Preliminary code and global definitions up to sub new
#######################################################################
package Perl::Tidy::Formatter;
use strict;
use warnings;
# DEVEL_MODE gets switched on during automated testing for extra checking
use constant DEVEL_MODE => 0;
use constant EMPTY_STRING => q{};
use constant SPACE => q{ };
{ #<<< A non-indenting brace to contain all lexical variables
use Carp;
use English qw( -no_match_vars );
use List::Util qw( min max first ); # min, max first are in Perl 5.8
our $VERSION = '20230912';
# The Tokenizer will be loaded with the Formatter
##use Perl::Tidy::Tokenizer; # for is_keyword()
sub AUTOLOAD {
# Catch any undefined sub calls so that we are sure to get
# some diagnostic information. This sub should never be called
# except for a programming error.
our $AUTOLOAD;
return if ( $AUTOLOAD =~ /\bDESTROY$/ );
my ( $pkg, $fname, $lno ) = caller();
my $my_package = __PACKAGE__;
print {*STDERR} <<EOM;
======================================================================
Error detected in package '$my_package', version $VERSION
Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
Called from package: '$pkg'
Called from File '$fname' at line '$lno'
This error is probably due to a recent programming change
======================================================================
EOM
exit 1;
} ## end sub AUTOLOAD
sub DESTROY {
my $self = shift;
$self->_decrement_count();
return;
}
sub Die {
my ($msg) = @_;
Perl::Tidy::Die($msg);
croak "unexpected return from Perl::Tidy::Die";
}
sub Warn {
my ($msg) = @_;
Perl::Tidy::Warn($msg);
return;
}
sub Fault {
my ($msg) = @_;
# This routine is called for errors that really should not occur
# except if there has been a bug introduced by a recent program change.
# Please add comments at calls to Fault to explain why the call
# should not occur, and where to look to fix it.
my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
my $pkg = __PACKAGE__;
my $input_stream_name = get_input_stream_name();
Die(<<EOM);
==============================================================================
While operating on input stream with name: '$input_stream_name'
A fault was detected at line $line0 of sub '$subroutine1'
in file '$filename1'
which was called from line $line1 of sub '$subroutine2'
Message: '$msg'
This is probably an error introduced by a recent programming change.
$pkg reports VERSION='$VERSION'.
==============================================================================
EOM
# We shouldn't get here, but this return is to keep Perl-Critic from
# complaining.
return;
} ## end sub Fault
sub Fault_Warn {
my ($msg) = @_;
# This is the same as Fault except that it calls Warn instead of Die
# and returns.
my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
my $input_stream_name = get_input_stream_name();
Warn(<<EOM);
==============================================================================
While operating on input stream with name: '$input_stream_name'
A fault was detected at line $line0 of sub '$subroutine1'
in file '$filename1'
which was called from line $line1 of sub '$subroutine2'
Message: '$msg'
This is probably an error introduced by a recent programming change.
Perl::Tidy::Formatter.pm reports VERSION='$VERSION'.
==============================================================================
EOM
return;
} ## end sub Fault_Warn
sub Exit {
my ($msg) = @_;
Perl::Tidy::Exit($msg);
croak "unexpected return from Perl::Tidy::Exit";
}
# Global variables ...
my (
#-----------------------------------------------------------------
# Section 1: Global variables which are either always constant or
# are constant after being configured by user-supplied
# parameters. They remain constant as a file is being processed.
# The INITIALIZER comment tells the sub responsible for initializing
# each variable. Failure to initialize or re-initialize a global
# variable can cause bugs which are hard to locate.
#-----------------------------------------------------------------
# INITIALIZER: sub check_options
$rOpts,
# short-cut option variables
# INITIALIZER: sub initialize_global_option_vars
$rOpts_add_newlines,
$rOpts_add_whitespace,
$rOpts_add_trailing_commas,
$rOpts_blank_lines_after_opening_block,
$rOpts_block_brace_tightness,
$rOpts_block_brace_vertical_tightness,
$rOpts_brace_follower_vertical_tightness,
$rOpts_break_after_labels,
$rOpts_break_at_old_attribute_breakpoints,
$rOpts_break_at_old_comma_breakpoints,
$rOpts_break_at_old_keyword_breakpoints,
$rOpts_break_at_old_logical_breakpoints,
$rOpts_break_at_old_semicolon_breakpoints,
$rOpts_break_at_old_ternary_breakpoints,
$rOpts_break_open_compact_parens,
$rOpts_closing_side_comments,
$rOpts_closing_side_comment_else_flag,
$rOpts_closing_side_comment_maximum_text,
$rOpts_comma_arrow_breakpoints,
$rOpts_continuation_indentation,
$rOpts_cuddled_paren_brace,
$rOpts_delete_closing_side_comments,
$rOpts_delete_old_whitespace,
$rOpts_delete_side_comments,
$rOpts_delete_trailing_commas,
$rOpts_delete_weld_interfering_commas,
$rOpts_extended_continuation_indentation,
$rOpts_format_skipping,
$rOpts_freeze_whitespace,
$rOpts_function_paren_vertical_alignment,
$rOpts_fuzzy_line_length,
$rOpts_ignore_old_breakpoints,
$rOpts_ignore_side_comment_lengths,
$rOpts_ignore_perlcritic_comments,
$rOpts_indent_closing_brace,
$rOpts_indent_columns,
$rOpts_indent_only,
$rOpts_keep_interior_semicolons,
$rOpts_line_up_parentheses,
$rOpts_logical_padding,
$rOpts_maximum_consecutive_blank_lines,
$rOpts_maximum_fields_per_table,
$rOpts_maximum_line_length,
$rOpts_one_line_block_semicolons,
$rOpts_opening_brace_always_on_right,
$rOpts_outdent_keywords,
$rOpts_outdent_labels,
$rOpts_outdent_long_comments,
$rOpts_outdent_long_quotes,
$rOpts_outdent_static_block_comments,
$rOpts_recombine,
$rOpts_short_concatenation_item_length,
$rOpts_space_prototype_paren,
$rOpts_stack_closing_block_brace,
$rOpts_static_block_comments,
$rOpts_add_missing_else,
$rOpts_warn_missing_else,
$rOpts_tee_block_comments,
$rOpts_tee_pod,
$rOpts_tee_side_comments,
$rOpts_variable_maximum_line_length,
$rOpts_valign_code,
$rOpts_valign_side_comments,
$rOpts_valign_if_unless,
$rOpts_whitespace_cycle,
$rOpts_extended_block_tightness,
$rOpts_extended_line_up_parentheses,
# Static hashes
# INITIALIZER: BEGIN block
%is_assignment,
%is_non_list_type,
%is_if_unless_and_or_last_next_redo_return,
%is_if_elsif_else_unless_while_until_for_foreach,
%is_if_unless_while_until_for_foreach,
%is_last_next_redo_return,
%is_if_unless,
%is_if_elsif,
%is_if_unless_elsif,
%is_if_unless_elsif_else,
%is_elsif_else,
%is_and_or,
%is_chain_operator,
%is_block_without_semicolon,
%ok_to_add_semicolon_for_block_type,
%is_opening_type,
%is_closing_type,
%is_opening_token,
%is_closing_token,
%is_ternary,
%is_equal_or_fat_comma,
%is_counted_type,
%is_opening_sequence_token,
%is_closing_sequence_token,
%matching_token,
%is_container_label_type,
%is_die_confess_croak_warn,
%is_my_our_local,
%is_soft_keep_break_type,
%is_indirect_object_taker,
@all_operators,
%is_do_follower,
%is_anon_sub_brace_follower,
%is_anon_sub_1_brace_follower,
%is_other_brace_follower,
# INITIALIZER: sub check_options
$controlled_comma_style,
%keep_break_before_type,
%keep_break_after_type,
%outdent_keyword,
%keyword_paren_inner_tightness,
%container_indentation_options,
%tightness,
%line_up_parentheses_control_hash,
$line_up_parentheses_control_is_lxpl,
# These can be modified by grep-alias-list
# INITIALIZER: sub initialize_grep_and_friends
%is_sort_map_grep,
%is_sort_map_grep_eval,
%is_sort_map_grep_eval_do,
%is_block_with_ci,
%is_keyword_returning_list,
%block_type_map, # initialized in BEGIN, but may be changed
%want_one_line_block, # may be changed in prepare_cuddled_block_types
# INITIALIZER: sub prepare_cuddled_block_types
$rcuddled_block_types,
# INITIALIZER: sub initialize_whitespace_hashes
%binary_ws_rules,
%want_left_space,
%want_right_space,
# INITIALIZER: sub initialize_bond_strength_hashes
%right_bond_strength,
%left_bond_strength,
# INITIALIZER: sub initialize_token_break_preferences
%want_break_before,
%break_before_container_types,
# INITIALIZER: sub initialize_space_after_keyword
%space_after_keyword,
# INITIALIZER: sub initialize_extended_block_tightness_list
%extended_block_tightness_list,
# INITIALIZED BY initialize_global_option_vars
%opening_vertical_tightness,
%closing_vertical_tightness,
%closing_token_indentation,
$some_closing_token_indentation,
%opening_token_right,
%stack_opening_token,
%stack_closing_token,
# INITIALIZER: sub initialize_weld_nested_exclusion_rules
%weld_nested_exclusion_rules,
# INITIALIZER: sub initialize_weld_fat_comma_rules
%weld_fat_comma_rules,
# INITIALIZER: sub initialize_trailing_comma_rules
%trailing_comma_rules,
# regex patterns for text identification.
# Most can be configured by user parameters.
# Most are initialized in a sub make_**_pattern during configuration.
# INITIALIZER: sub make_sub_matching_pattern
$SUB_PATTERN,
$ASUB_PATTERN,
%matches_ASUB,
# INITIALIZER: make_static_block_comment_pattern
$static_block_comment_pattern,
# INITIALIZER: sub make_static_side_comment_pattern
$static_side_comment_pattern,
# INITIALIZER: make_format_skipping_pattern
$format_skipping_pattern_begin,
$format_skipping_pattern_end,
# INITIALIZER: sub make_non_indenting_brace_pattern
$non_indenting_brace_pattern,
# INITIALIZER: sub make_bl_pattern
$bl_exclusion_pattern,
# INITIALIZER: make_bl_pattern
$bl_pattern,
# INITIALIZER: sub make_bli_pattern
$bli_exclusion_pattern,
# INITIALIZER: sub make_bli_pattern
$bli_pattern,
# INITIALIZER: sub make_block_brace_vertical_tightness_pattern
$block_brace_vertical_tightness_pattern,
# INITIALIZER: sub make_blank_line_pattern
$blank_lines_after_opening_block_pattern,
$blank_lines_before_closing_block_pattern,
# INITIALIZER: sub make_keyword_group_list_pattern
$keyword_group_list_pattern,
$keyword_group_list_comment_pattern,
# INITIALIZER: sub make_closing_side_comment_prefix
$closing_side_comment_prefix_pattern,
# INITIALIZER: sub make_closing_side_comment_list_pattern
$closing_side_comment_list_pattern,
# Table to efficiently find indentation and max line length
# from level.
# INITIALIZER: sub initialize_line_length_vars
@maximum_line_length_at_level,
@maximum_text_length_at_level,
$stress_level_alpha,
$stress_level_beta,
$high_stress_level,
# Total number of sequence items in a weld, for quick checks
# INITIALIZER: weld_containers
$total_weld_count,
#--------------------------------------------------------
# Section 2: Work arrays for the current batch of tokens.
#--------------------------------------------------------
# These are re-initialized for each batch of code
# INITIALIZER: sub initialize_batch_variables
$max_index_to_go,
@block_type_to_go,
@type_sequence_to_go,
@forced_breakpoint_to_go,
@token_lengths_to_go,
@summed_lengths_to_go,
@levels_to_go,
@leading_spaces_to_go,
@reduced_spaces_to_go,
@mate_index_to_go,
@ci_levels_to_go,
@nesting_depth_to_go,
@nobreak_to_go,
@old_breakpoint_to_go,
@tokens_to_go,
@K_to_go,
@types_to_go,
@inext_to_go,
@parent_seqno_to_go,
# forced breakpoint variables associated with each batch of code
$forced_breakpoint_count,
$forced_breakpoint_undo_count,
$index_max_forced_break,
);
BEGIN {
# Index names for token variables.
# Do not combine with other BEGIN blocks (c101).
my $i = 0;
use constant {
_CI_LEVEL_ => $i++,
_CUMULATIVE_LENGTH_ => $i++,
_LINE_INDEX_ => $i++,
_KNEXT_SEQ_ITEM_ => $i++,
_LEVEL_ => $i++,
_TOKEN_ => $i++,
_TOKEN_LENGTH_ => $i++,
_TYPE_ => $i++,
_TYPE_SEQUENCE_ => $i++,
# Number of token variables; must be last in list:
_NVARS => $i++,
};
} ## end BEGIN
BEGIN {
# Index names for $self variables.
# Do not combine with other BEGIN blocks (c101).
my $i = 0;
use constant {
_rlines_ => $i++,
_rLL_ => $i++,
_Klimit_ => $i++,
_rdepth_of_opening_seqno_ => $i++,
_rSS_ => $i++,
_Iss_opening_ => $i++,
_Iss_closing_ => $i++,
_rblock_type_of_seqno_ => $i++,
_ris_asub_block_ => $i++,
_ris_sub_block_ => $i++,
_K_opening_container_ => $i++,
_K_closing_container_ => $i++,
_K_opening_ternary_ => $i++,
_K_closing_ternary_ => $i++,
_K_first_seq_item_ => $i++,
_rtype_count_by_seqno_ => $i++,
_ris_function_call_paren_ => $i++,
_rlec_count_by_seqno_ => $i++,
_ris_broken_container_ => $i++,
_ris_permanently_broken_ => $i++,
_rblank_and_comment_count_ => $i++,
_rhas_list_ => $i++,
_rhas_broken_list_ => $i++,
_rhas_broken_list_with_lec_ => $i++,
_rfirst_comma_line_index_ => $i++,
_rhas_code_block_ => $i++,
_rhas_broken_code_block_ => $i++,
_rhas_ternary_ => $i++,
_ris_excluded_lp_container_ => $i++,
_rlp_object_by_seqno_ => $i++,
_rwant_reduced_ci_ => $i++,
_rno_xci_by_seqno_ => $i++,
_rbrace_left_ => $i++,
_ris_bli_container_ => $i++,
_rparent_of_seqno_ => $i++,
_rchildren_of_seqno_ => $i++,
_ris_list_by_seqno_ => $i++,
_ris_cuddled_closing_brace_ => $i++,
_rbreak_container_ => $i++,
_rshort_nested_ => $i++,
_length_function_ => $i++,
_is_encoded_data_ => $i++,
_fh_tee_ => $i++,
_sink_object_ => $i++,
_file_writer_object_ => $i++,
_vertical_aligner_object_ => $i++,
_logger_object_ => $i++,
_radjusted_levels_ => $i++,
_this_batch_ => $i++,
_ris_special_identifier_token_ => $i++,
_last_output_short_opening_token_ => $i++,
_last_line_leading_type_ => $i++,
_last_line_leading_level_ => $i++,
_added_semicolon_count_ => $i++,
_first_added_semicolon_at_ => $i++,
_last_added_semicolon_at_ => $i++,
_deleted_semicolon_count_ => $i++,
_first_deleted_semicolon_at_ => $i++,
_last_deleted_semicolon_at_ => $i++,
_embedded_tab_count_ => $i++,
_first_embedded_tab_at_ => $i++,
_last_embedded_tab_at_ => $i++,
_first_tabbing_disagreement_ => $i++,
_last_tabbing_disagreement_ => $i++,
_tabbing_disagreement_count_ => $i++,
_in_tabbing_disagreement_ => $i++,
_first_brace_tabbing_disagreement_ => $i++,
_in_brace_tabbing_disagreement_ => $i++,
_saw_VERSION_in_this_file_ => $i++,
_saw_END_or_DATA_ => $i++,
_rK_weld_left_ => $i++,
_rK_weld_right_ => $i++,
_rweld_len_right_at_K_ => $i++,
_rspecial_side_comment_type_ => $i++,
_rseqno_controlling_my_ci_ => $i++,
_ris_seqno_controlling_ci_ => $i++,
_save_logfile_ => $i++,
_maximum_level_ => $i++,
_maximum_level_at_line_ => $i++,
_maximum_BLOCK_level_ => $i++,
_maximum_BLOCK_level_at_line_ => $i++,
_rKrange_code_without_comments_ => $i++,
_rbreak_before_Kfirst_ => $i++,
_rbreak_after_Klast_ => $i++,
_converged_ => $i++,
_rstarting_multiline_qw_seqno_by_K_ => $i++,
_rending_multiline_qw_seqno_by_K_ => $i++,
_rKrange_multiline_qw_by_seqno_ => $i++,
_rmultiline_qw_has_extra_level_ => $i++,
_rcollapsed_length_by_seqno_ => $i++,
_rbreak_before_container_by_seqno_ => $i++,
_roverride_cab3_ => $i++,
_ris_assigned_structure_ => $i++,
_ris_short_broken_eval_block_ => $i++,
_ris_bare_trailing_comma_by_seqno_ => $i++,
_rseqno_non_indenting_brace_by_ix_ => $i++,
_rmax_vertical_tightness_ => $i++,
_no_vertical_tightness_flags_ => $i++,
_LAST_SELF_INDEX_ => $i - 1,
};
} ## end BEGIN
BEGIN {
# Index names for batch variables.
# Do not combine with other BEGIN blocks (c101).
# These are stored in _this_batch_, which is a sub-array of $self.
my $i = 0;
use constant {
_starting_in_quote_ => $i++,
_ending_in_quote_ => $i++,
_is_static_block_comment_ => $i++,
_ri_first_ => $i++,
_ri_last_ => $i++,
_do_not_pad_ => $i++,
_peak_batch_size_ => $i++,
_batch_count_ => $i++,
_rix_seqno_controlling_ci_ => $i++,
_batch_CODE_type_ => $i++,
_ri_starting_one_line_block_ => $i++,
_runmatched_opening_indexes_ => $i++,
_lp_object_count_this_batch_ => $i++,
};
} ## end BEGIN
BEGIN {
# Sequence number assigned to the root of sequence tree.
# The minimum of the actual sequences numbers is 4, so we can use 1
use constant SEQ_ROOT => 1;
# Codes for insertion and deletion of blanks
use constant DELETE => 0;
use constant STABLE => 1;
use constant INSERT => 2;
# whitespace codes
use constant WS_YES => 1;
use constant WS_OPTIONAL => 0;
use constant WS_NO => -1;
# Token bond strengths.
use constant NO_BREAK => 10_000;
use constant VERY_STRONG => 100;
use constant STRONG => 2.1;
use constant NOMINAL => 1.1;
use constant WEAK => 0.8;
use constant VERY_WEAK => 0.55;
# values for testing indexes in output array
use constant UNDEFINED_INDEX => -1;
# Maximum number of little messages; probably need not be changed.
use constant MAX_NAG_MESSAGES => 6;
# This is the decimal range of printable characters in ASCII. It is used to
# make quick preliminary checks before resorting to using a regex.
use constant ORD_PRINTABLE_MIN => 33;
use constant ORD_PRINTABLE_MAX => 126;
# Initialize constant hashes ...
my @q;
@q = qw(
= **= += *= &= <<= &&=
-= /= |= >>= ||= //=
.= %= ^=
x=
);
@is_assignment{@q} = (1) x scalar(@q);
# a hash needed by break_lists for efficiency:
push @q, qw{ ; < > ~ f };
@is_non_list_type{@q} = (1) x scalar(@q);
@q = qw(is if unless and or err last next redo return);
@is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
# These block types may have text between the keyword and opening
# curly. Note: 'else' does not, but must be included to allow trailing
# if/elsif text to be appended.
# patch for SWITCH/CASE: added 'case' and 'when'
@q = qw(if elsif else unless while until for foreach case when catch);
@is_if_elsif_else_unless_while_until_for_foreach{@q} =
(1) x scalar(@q);
@q = qw(if unless while until for foreach);
@is_if_unless_while_until_for_foreach{@q} =
(1) x scalar(@q);
@q = qw(last next redo return);
@is_last_next_redo_return{@q} = (1) x scalar(@q);
# Map related block names into a common name to allow vertical alignment
# used by sub make_alignment_patterns. Note: this is normally unchanged,
# but it contains 'grep' and can be re-initialized in
# sub initialize_grep_and_friends in a testing mode.
%block_type_map = (
'unless' => 'if',
'else' => 'if',
'elsif' => 'if',
'when' => 'if',
'default' => 'if',
'case' => 'if',
'sort' => 'map',
'grep' => 'map',
);
@q = qw(if unless);
@is_if_unless{@q} = (1) x scalar(@q);
@q = qw(if elsif);
@is_if_elsif{@q} = (1) x scalar(@q);
@q = qw(if unless elsif);
@is_if_unless_elsif{@q} = (1) x scalar(@q);
@q = qw(if unless elsif else);
@is_if_unless_elsif_else{@q} = (1) x scalar(@q);
@q = qw(elsif else);
@is_elsif_else{@q} = (1) x scalar(@q);
@q = qw(and or err);
@is_and_or{@q} = (1) x scalar(@q);
# Identify certain operators which often occur in chains.
# Note: the minus (-) causes a side effect of padding of the first line in
# something like this (by sub set_logical_padding):
# Checkbutton => 'Transmission checked',
# -variable => \$TRANS
# This usually improves appearance so it seems ok.
@q = qw(&& || and or : ? . + - * /);
@is_chain_operator{@q} = (1) x scalar(@q);
# Operators that the user can request break before or after.
# Note that some are keywords
@all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
= **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
. : ? && || and or err xor
);
# We can remove semicolons after blocks preceded by these keywords
@q =
qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
unless while until for foreach given when default);
@is_block_without_semicolon{@q} = (1) x scalar(@q);
# We will allow semicolons to be added within these block types
# as well as sub and package blocks.
# NOTES:
# 1. Note that these keywords are omitted:
# switch case given when default sort map grep
# 2. It is also ok to add for sub and package blocks and a labeled block
# 3. But not okay for other perltidy types including:
# { } ; G t
# 4. Test files: blktype.t, blktype1.t, semicolon.t
@q =
qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
unless do while until eval for foreach );
@ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q);
# 'L' is token for opening { at hash key
@q = qw< L { ( [ >;
@is_opening_type{@q} = (1) x scalar(@q);
# 'R' is token for closing } at hash key
@q = qw< R } ) ] >;
@is_closing_type{@q} = (1) x scalar(@q);
@q = qw< { ( [ >;
@is_opening_token{@q} = (1) x scalar(@q);
@q = qw< } ) ] >;
@is_closing_token{@q} = (1) x scalar(@q);
@q = qw( ? : );
@is_ternary{@q} = (1) x scalar(@q);
@q = qw< { ( [ ? >;
@is_opening_sequence_token{@q} = (1) x scalar(@q);
@q = qw< } ) ] : >;
@is_closing_sequence_token{@q} = (1) x scalar(@q);
%matching_token = (
'{' => '}',
'(' => ')',
'[' => ']',
'?' => ':',
'}' => '{',
')' => '(',
']' => '[',
':' => '?',
);
# a hash needed by sub break_lists for labeling containers
@q = qw( k => && || ? : . );
@is_container_label_type{@q} = (1) x scalar(@q);
@q = qw( die confess croak warn );
@is_die_confess_croak_warn{@q} = (1) x scalar(@q);
@q = qw( my our local );
@is_my_our_local{@q} = (1) x scalar(@q);
# Braces -bbht etc must follow these. Note: experimentation with
# including a simple comma shows that it adds little and can lead
# to poor formatting in complex lists.
@q = qw( = => );
@is_equal_or_fat_comma{@q} = (1) x scalar(@q);
@q = qw( => ; h f );
push @q, ',';
@is_counted_type{@q} = (1) x scalar(@q);
# Tokens where --keep-old-break-xxx flags make soft breaks instead
# of hard breaks. See b1433 and b1436.
# NOTE: $type is used as the hash key for now; if other container tokens
# are added it might be necessary to use a token/type mixture.
@q = qw# -> ? : && || + - / * #;
@is_soft_keep_break_type{@q} = (1) x scalar(@q);
# these functions allow an identifier in the indirect object slot
@q = qw( print printf sort exec system say);
@is_indirect_object_taker{@q} = (1) x scalar(@q);
# Define here tokens which may follow the closing brace of a do statement
# on the same line, as in:
# } while ( $something);
my @dof = qw(until while unless if ; : );
push @dof, ',';
@is_do_follower{@dof} = (1) x scalar(@dof);
# what can follow a multi-line anonymous sub definition closing curly:
my @asf = qw# ; : => or and && || ~~ !~~ ) #;
push @asf, ',';
@is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
# what can follow a one-line anonymous sub closing curly:
# one-line anonymous subs also have ']' here...
# see tk3.t and PP.pm
my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #;
push @asf1, ',';
@is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
# What can follow a closing curly of a block
# which is not an if/elsif/else/do/sort/map/grep/eval/sub
# Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
my @obf = qw# ; : => or and && || ) #;
push @obf, ',';
@is_other_brace_follower{@obf} = (1) x scalar(@obf);
} ## end BEGIN
{ ## begin closure to count instances
# methods to count instances
my $_count = 0;
sub _increment_count { return ++$_count }
sub _decrement_count { return --$_count }
} ## end closure to count instances
sub new {
my ( $class, @args ) = @_;
# we are given an object with a write_line() method to take lines
my %defaults = (
sink_object => undef,
diagnostics_object => undef,
logger_object => undef,
length_function => undef,
is_encoded_data => EMPTY_STRING,
fh_tee => undef,
);
my %args = ( %defaults, @args );
my $length_function = $args{length_function};
my $is_encoded_data = $args{is_encoded_data};
my $fh_tee = $args{fh_tee};
my $logger_object = $args{logger_object};
my $diagnostics_object = $args{diagnostics_object};
# we create another object with a get_line() and peek_ahead() method
my $sink_object = $args{sink_object};
my $file_writer_object =
Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
# initialize closure variables...
set_logger_object($logger_object);
set_diagnostics_object($diagnostics_object);
initialize_lp_vars();
initialize_csc_vars();
initialize_break_lists();
initialize_undo_ci();
initialize_process_line_of_CODE();
initialize_grind_batch_of_CODE();
initialize_get_final_indentation();
initialize_postponed_breakpoint();
initialize_batch_variables();
initialize_write_line();
my $vertical_aligner_object = Perl::Tidy::VerticalAligner->new(
rOpts => $rOpts,
file_writer_object => $file_writer_object,
logger_object => $logger_object,
diagnostics_object => $diagnostics_object,
);
write_logfile_entry("\nStarting tokenization pass...\n");
if ( $rOpts->{'entab-leading-whitespace'} ) {
write_logfile_entry(
"Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
);
}
elsif ( $rOpts->{'tabs'} ) {
write_logfile_entry("Indentation will be with a tab character\n");
}
else {
write_logfile_entry(
"Indentation will be with $rOpts->{'indent-columns'} spaces\n");
}
# Initialize the $self array reference.
# To add an item, first add a constant index in the BEGIN block above.
my $self = [];
# Basic data structures...
$self->[_rlines_] = []; # = ref to array of lines of the file
# 'rLL' = reference to the continuous liner array of all tokens in a file.
# 'LL' stands for 'Linked List'. Using a linked list was a disaster, but
# 'LL' stuck because it is easy to type. The 'rLL' array is updated
# by sub 'respace_tokens' during reformatting. The indexes in 'rLL' begin
# with '$K' by convention.
$self->[_rLL_] = [];
$self->[_Klimit_] = undef; # = maximum K index for rLL.
# Indexes into the rLL list
$self->[_K_opening_container_] = {};
$self->[_K_closing_container_] = {};
$self->[_K_opening_ternary_] = {};
$self->[_K_closing_ternary_] = {};
$self->[_K_first_seq_item_] = undef; # K of first token with a sequence #
# 'rSS' is the 'Signed Sequence' list, a continuous list of all sequence
# numbers with + or - indicating opening or closing. This list represents
# the entire container tree and is invariant under reformatting. It can be
# used to quickly travel through the tree. Indexes in the rSS array begin
# with '$I' by convention. The 'Iss' arrays give the indexes in this list
# of opening and closing sequence numbers.
$self->[_rSS_] = [];
$self->[_Iss_opening_] = [];
$self->[_Iss_closing_] = [];
# Arrays to help traverse the tree
$self->[_rdepth_of_opening_seqno_] = [];
$self->[_rblock_type_of_seqno_] = {};
$self->[_ris_asub_block_] = {};
$self->[_ris_sub_block_] = {};
# Mostly list characteristics and processing flags
$self->[_rtype_count_by_seqno_] = {};
$self->[_ris_function_call_paren_] = {};
$self->[_rlec_count_by_seqno_] = {};
$self->[_ris_broken_container_] = {};
$self->[_ris_permanently_broken_] = {};
$self->[_rblank_and_comment_count_] = {};
$self->[_rhas_list_] = {};
$self->[_rhas_broken_list_] = {};
$self->[_rhas_broken_list_with_lec_] = {};
$self->[_rfirst_comma_line_index_] = {};
$self->[_rhas_code_block_] = {};
$self->[_rhas_broken_code_block_] = {};
$self->[_rhas_ternary_] = {};
$self->[_ris_excluded_lp_container_] = {};
$self->[_rlp_object_by_seqno_] = {};
$self->[_rwant_reduced_ci_] = {};
$self->[_rno_xci_by_seqno_] = {};
$self->[_rbrace_left_] = {};
$self->[_ris_bli_container_] = {};
$self->[_rparent_of_seqno_] = {};
$self->[_rchildren_of_seqno_] = {};
$self->[_ris_list_by_seqno_] = {};
$self->[_ris_cuddled_closing_brace_] = {};
$self->[_rbreak_container_] = {}; # prevent one-line blocks
$self->[_rshort_nested_] = {}; # blocks not forced open
$self->[_length_function_] = $length_function;
$self->[_is_encoded_data_] = $is_encoded_data;
# Some objects...
$self->[_fh_tee_] = $fh_tee;
$self->[_sink_object_] = $sink_object;
$self->[_file_writer_object_] = $file_writer_object;
$self->[_vertical_aligner_object_] = $vertical_aligner_object;
$self->[_logger_object_] = $logger_object;
# Reference to the batch being processed
$self->[_this_batch_] = [];
# Memory of processed text...
$self->[_ris_special_identifier_token_] = {};
$self->[_last_line_leading_level_] = 0;
$self->[_last_line_leading_type_] = '#';
$self->[_last_output_short_opening_token_] = 0;
$self->[_added_semicolon_count_] = 0;
$self->[_first_added_semicolon_at_] = 0;
$self->[_last_added_semicolon_at_] = 0;
$self->[_deleted_semicolon_count_] = 0;
$self->[_first_deleted_semicolon_at_] = 0;
$self->[_last_deleted_semicolon_at_] = 0;
$self->[_embedded_tab_count_] = 0;
$self->[_first_embedded_tab_at_] = 0;
$self->[_last_embedded_tab_at_] = 0;
$self->[_first_tabbing_disagreement_] = 0;
$self->[_last_tabbing_disagreement_] = 0;
$self->[_tabbing_disagreement_count_] = 0;
$self->[_in_tabbing_disagreement_] = 0;
$self->[_saw_VERSION_in_this_file_] = !$rOpts->{'pass-version-line'};
$self->[_saw_END_or_DATA_] = 0;
$self->[_first_brace_tabbing_disagreement_] = undef;
$self->[_in_brace_tabbing_disagreement_] = undef;
# Hashes related to container welding...
$self->[_radjusted_levels_] = [];
# Weld data structures
$self->[_rK_weld_left_] = {};
$self->[_rK_weld_right_] = {};
$self->[_rweld_len_right_at_K_] = {};
# -xci stuff
$self->[_rseqno_controlling_my_ci_] = {};
$self->[_ris_seqno_controlling_ci_] = {};
$self->[_rspecial_side_comment_type_] = {};
$self->[_maximum_level_] = 0;
$self->[_maximum_level_at_line_] = 0;
$self->[_maximum_BLOCK_level_] = 0;
$self->[_maximum_BLOCK_level_at_line_] = 0;
$self->[_rKrange_code_without_comments_] = [];
$self->[_rbreak_before_Kfirst_] = {};
$self->[_rbreak_after_Klast_] = {};
$self->[_converged_] = 0;
# qw stuff
$self->[_rstarting_multiline_qw_seqno_by_K_] = {};
$self->[_rending_multiline_qw_seqno_by_K_] = {};
$self->[_rKrange_multiline_qw_by_seqno_] = {};
$self->[_rmultiline_qw_has_extra_level_] = {};
$self->[_rcollapsed_length_by_seqno_] = {};
$self->[_rbreak_before_container_by_seqno_] = {};
$self->[_roverride_cab3_] = {};
$self->[_ris_assigned_structure_] = {};
$self->[_ris_short_broken_eval_block_] = {};
$self->[_ris_bare_trailing_comma_by_seqno_] = {};
$self->[_rseqno_non_indenting_brace_by_ix_] = {};
$self->[_rmax_vertical_tightness_] = {};
$self->[_no_vertical_tightness_flags_] = 0;
# This flag will be updated later by a call to get_save_logfile()
$self->[_save_logfile_] = defined($logger_object);
# Be sure all variables in $self have been initialized above. To find the
# correspondence of index numbers and array names, copy a list to a file
# and use the unix 'nl' command to number lines 1..
if (DEVEL_MODE) {
my @non_existant;
foreach ( 0 .. _LAST_SELF_INDEX_ ) {
if ( !exists( $self->[$_] ) ) {
push @non_existant, $_;
}
}
if (@non_existant) {
Fault("These indexes in self not initialized: (@non_existant)\n");
}
}
bless $self, $class;
# Safety check..this is not a class yet
if ( _increment_count() > 1 ) {
confess
"Attempt to create more than 1 object in $class, which is not a true class yet\n";
}
return $self;
} ## end sub new
######################################
# CODE SECTION 2: Some Basic Utilities
######################################
sub check_rLL {
# Verify that the rLL array has not been auto-vivified
my ( $self, $msg ) = @_;
my $rLL = $self->[_rLL_];
my $Klimit = $self->[_Klimit_];
my $num = @{$rLL};
if ( ( defined($Klimit) && $Klimit != $num - 1 )
|| ( !defined($Klimit) && $num > 0 ) )
{
# This fault can occur if the array has been accessed for an index
# greater than $Klimit, which is the last token index. Just accessing
# the array above index $Klimit, not setting a value, can cause @rLL to
# increase beyond $Klimit. If this occurs, the problem can be located
# by making calls to this routine at different locations in
# sub 'finish_formatting'.
$Klimit = 'undef' if ( !defined($Klimit) );
$msg = EMPTY_STRING unless $msg;
Fault("$msg ERROR: rLL has num=$num but Klimit='$Klimit'\n");
}
return;
} ## end sub check_rLL
sub check_keys {
my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
# Check the keys of a hash:
# $rtest = ref to hash to test
# $rvalid = ref to hash with valid keys
# $msg = a message to write in case of error
# $exact_match defines the type of check:
# = false: test hash must not have unknown key
# = true: test hash must have exactly same keys as known hash
my @unknown_keys =
grep { !exists $rvalid->{$_} } keys %{$rtest};
my @missing_keys =
grep { !exists $rtest->{$_} } keys %{$rvalid};
my $error = @unknown_keys;
if ($exact_match) { $error ||= @missing_keys }
if ($error) {
local $LIST_SEPARATOR = ')(';
my @expected_keys = sort keys %{$rvalid};
@unknown_keys = sort @unknown_keys;
Fault(<<EOM);
------------------------------------------------------------------------
Program error detected checking hash keys
Message is: '$msg'
Expected keys: (@expected_keys)
Unknown key(s): (@unknown_keys)
Missing key(s): (@missing_keys)
------------------------------------------------------------------------
EOM
}
return;
} ## end sub check_keys
sub check_token_array {
my $self = shift;
# Check for errors in the array of tokens. This is only called
# when the DEVEL_MODE flag is set, so this Fault will only occur
# during code development.
my $rLL = $self->[_rLL_];
foreach my $KK ( 0 .. @{$rLL} - 1 ) {
my $nvars = @{ $rLL->[$KK] };
if ( $nvars != _NVARS ) {
my $NVARS = _NVARS;
my $type = $rLL->[$KK]->[_TYPE_];
$type = '*' unless defined($type);
# The number of variables per token node is _NVARS and was set when
# the array indexes were generated. So if the number of variables
# is different we have done something wrong, like not store all of
# them in sub 'write_line' when they were received from the
# tokenizer.
Fault(
"number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
);
}
foreach my $var ( _TOKEN_, _TYPE_ ) {
if ( !defined( $rLL->[$KK]->[$var] ) ) {
my $iline = $rLL->[$KK]->[_LINE_INDEX_];
# This is a simple check that each token has some basic
# variables. In other words, that there are no holes in the
# array of tokens. Sub 'write_line' pushes tokens into the
# $rLL array, so this should guarantee no gaps.
Fault("Undefined variable $var for K=$KK, line=$iline\n");
}
}
}
return;
} ## end sub check_token_array
{ ## begin closure check_line_hashes
# This code checks that no auto-vivification occurs in the 'line' hash
my %valid_line_hash;
BEGIN {
# These keys are defined for each line in the formatter
# Each line must have exactly these quantities
my @valid_line_keys = qw(
_curly_brace_depth
_ending_in_quote
_guessed_indentation_level
_line_number
_line_text
_line_type
_paren_depth
_quote_character
_rK_range
_square_bracket_depth
_starting_in_quote
_ended_in_blank_token
_code_type
_ci_level_0
_level_0
_nesting_blocks_0
_nesting_tokens_0
);
@valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
} ## end BEGIN
sub check_line_hashes {
my $self = shift;
my $rlines = $self->[_rlines_];
foreach my $rline ( @{$rlines} ) {
my $iline = $rline->{_line_number};
my $line_type = $rline->{_line_type};
check_keys( $rline, \%valid_line_hash,
"Checkpoint: line number =$iline, line_type=$line_type", 1 );
}
return;
} ## end sub check_line_hashes
} ## end closure check_line_hashes
{ ## begin closure for logger routines
my $logger_object;
# Called once per file to initialize the logger object
sub set_logger_object {
$logger_object = shift;
return;
}
sub get_logger_object {
return $logger_object;
}
sub get_input_stream_name {
my $input_stream_name = EMPTY_STRING;
if ($logger_object) {
$input_stream_name = $logger_object->get_input_stream_name();
}
return $input_stream_name;
} ## end sub get_input_stream_name
# interface to Perl::Tidy::Logger routines
sub warning {
my ( $msg, $msg_line_number ) = @_;
if ($logger_object) {
$logger_object->warning( $msg, $msg_line_number );
}
return;
}
sub complain {
my ( $msg, $msg_line_number ) = @_;
if ($logger_object) {
$logger_object->complain( $msg, $msg_line_number );
}
return;
} ## end sub complain
sub write_logfile_entry {
my @msg = @_;
if ($logger_object) {
$logger_object->write_logfile_entry(@msg);
}
return;
} ## end sub write_logfile_entry
sub get_saw_brace_error {
if ($logger_object) {
return $logger_object->get_saw_brace_error();
}
return;
} ## end sub get_saw_brace_error
sub we_are_at_the_last_line {
if ($logger_object) {
$logger_object->we_are_at_the_last_line();
}
return;
} ## end sub we_are_at_the_last_line
} ## end closure for logger routines
{ ## begin closure for diagnostics routines
my $diagnostics_object;
# Called once per file to initialize the diagnostics object
sub set_diagnostics_object {
$diagnostics_object = shift;
return;
}
# Available for debugging but not currently used:
sub write_diagnostics {
my ( $msg, $line_number ) = @_;
if ($diagnostics_object) {
$diagnostics_object->write_diagnostics( $msg, $line_number );
}
return;
} ## end sub write_diagnostics
} ## end closure for diagnostics routines
sub get_convergence_check {
my ($self) = @_;
return $self->[_converged_];
}
sub get_output_line_number {
my ($self) = @_;
my $vao = $self->[_vertical_aligner_object_];
return $vao->get_output_line_number();
}
sub want_blank_line {
my $self = shift;
$self->flush();
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->want_blank_line();
return;
} ## end sub want_blank_line
sub write_unindented_line {
my ( $self, $line ) = @_;
$self->flush();
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->write_line($line);
return;
} ## end sub write_unindented_line
sub consecutive_nonblank_lines {
my ($self) = @_;
my $file_writer_object = $self->[_file_writer_object_];
my $vao = $self->[_vertical_aligner_object_];
return $file_writer_object->get_consecutive_nonblank_lines() +
$vao->get_cached_line_count();
} ## end sub consecutive_nonblank_lines
sub split_words {
# given a string containing words separated by whitespace,
# return the list of words
my ($str) = @_;
return unless $str;
$str =~ s/\s+$//;
$str =~ s/^\s+//;
return split( /\s+/, $str );
} ## end sub split_words
###########################################
# CODE SECTION 3: Check and process options
###########################################
sub check_options {
# This routine is called to check the user-supplied run parameters
# and to configure the control hashes to them.
$rOpts = shift;
$controlled_comma_style = 0;
initialize_whitespace_hashes();
initialize_bond_strength_hashes();
# This function must be called early to get hashes with grep initialized
initialize_grep_and_friends();
# Make needed regex patterns for matching text.
# NOTE: sub_matching_patterns must be made first because later patterns use
# them; see RT #133130.
make_sub_matching_pattern(); # must be first pattern made
make_static_block_comment_pattern();
make_static_side_comment_pattern();
make_closing_side_comment_prefix();
make_closing_side_comment_list_pattern();
$format_skipping_pattern_begin =
make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
$format_skipping_pattern_end =
make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
make_non_indenting_brace_pattern();
# If closing side comments ARE selected, then we can safely
# delete old closing side comments unless closing side comment
# warnings are requested. This is a good idea because it will
# eliminate any old csc's which fall below the line count threshold.
# We cannot do this if warnings are turned on, though, because we
# might delete some text which has been added. So that must
# be handled when comments are created. And we cannot do this
# with -io because -csc will be skipped altogether.
if ( $rOpts->{'closing-side-comments'} ) {
if ( !$rOpts->{'closing-side-comment-warnings'}
&& !$rOpts->{'indent-only'} )
{
$rOpts->{'delete-closing-side-comments'} = 1;
}
}
# If closing side comments ARE NOT selected, but warnings ARE
# selected and we ARE DELETING csc's, then we will pretend to be
# adding with a huge interval. This will force the comments to be
# generated for comparison with the old comments, but not added.
elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
if ( $rOpts->{'delete-closing-side-comments'} ) {
$rOpts->{'delete-closing-side-comments'} = 0;
$rOpts->{'closing-side-comments'} = 1;
$rOpts->{'closing-side-comment-interval'} = 100_000_000;
}
}
else {
## ok - no -csc issues
}
my $comment = $rOpts->{'add-missing-else-comment'};
if ( !$comment ) {
$comment = "##FIXME - added with perltidy -ame";
}
else {
$comment = substr( $comment, 0, 60 );
$comment =~ s/^\s+//;
$comment =~ s/\s+$//;
$comment =~ s/\n/ /g;
if ( substr( $comment, 0, 1 ) ne '#' ) {
$comment = '#' . $comment;
}
}
$rOpts->{'add-missing-else-comment'} = $comment;
make_bli_pattern();
make_bl_pattern();
make_block_brace_vertical_tightness_pattern();
make_blank_line_pattern();
make_keyword_group_list_pattern();
prepare_cuddled_block_types();
if ( $rOpts->{'dump-cuddled-block-list'} ) {
dump_cuddled_block_list(*STDOUT);
Exit(0);
}
# -xlp implies -lp
if ( $rOpts->{'extended-line-up-parentheses'} ) {
$rOpts->{'line-up-parentheses'} ||= 1;
}
if ( $rOpts->{'line-up-parentheses'} ) {
if ( $rOpts->{'indent-only'}
|| !$rOpts->{'add-newlines'}
|| !$rOpts->{'delete-old-newlines'} )
{
Warn(<<EOM);
-----------------------------------------------------------------------
Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
The -lp indentation logic requires that perltidy be able to coordinate
arbitrarily large numbers of line breakpoints. This isn't possible
with these flags.
-----------------------------------------------------------------------
EOM
$rOpts->{'line-up-parentheses'} = 0;
$rOpts->{'extended-line-up-parentheses'} = 0;
}
if ( $rOpts->{'whitespace-cycle'} ) {
Warn(<<EOM);
Conflict: -wc cannot currently be used with the -lp option; ignoring -wc
EOM
$rOpts->{'whitespace-cycle'} = 0;
}
}
# At present, tabs are not compatible with the line-up-parentheses style
# (it would be possible to entab the total leading whitespace
# just prior to writing the line, if desired).
if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
Warn(<<EOM);
Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
EOM
$rOpts->{'tabs'} = 0;
}
# Likewise, tabs are not compatible with outdenting..
if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
Warn(<<EOM);
Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
EOM
$rOpts->{'tabs'} = 0;
}
if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
Warn(<<EOM);
Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
EOM
$rOpts->{'tabs'} = 0;
}
if ( !$rOpts->{'space-for-semicolon'} ) {
$want_left_space{'f'} = -1;
}
if ( $rOpts->{'space-terminal-semicolon'} ) {
$want_left_space{';'} = 1;
}
# We should put an upper bound on any -sil=n value. Otherwise enormous
# files could be created by mistake.
for ( $rOpts->{'starting-indentation-level'} ) {
if ( $_ && $_ > 100 ) {
Warn(<<EOM);
The value --starting-indentation-level=$_ is very large; a mistake? resetting to 0;
EOM
$_ = 0;
}
}
# Require -msp > 0 to avoid future parsing problems (issue c147)
for ( $rOpts->{'minimum-space-to-comment'} ) {
if ( !$_ || $_ <= 0 ) { $_ = 1 }
}
# implement outdenting preferences for keywords
%outdent_keyword = ();
my @okw = split_words( $rOpts->{'outdent-keyword-list'} );
if ( !@okw ) {
@okw = qw(next last redo goto return); # defaults
}
# FUTURE: if not a keyword, assume that it is an identifier
foreach (@okw) {
if ( Perl::Tidy::Tokenizer::is_keyword($_) ) {
$outdent_keyword{$_} = 1;
}
else {
Warn("ignoring '$_' in -okwl list; not a perl keyword");
}
}
# setup hash for -kpit option
%keyword_paren_inner_tightness = ();
my $kpit_value = $rOpts->{'keyword-paren-inner-tightness'};
if ( defined($kpit_value) && $kpit_value != 1 ) {
my @kpit =
split_words( $rOpts->{'keyword-paren-inner-tightness-list'} );
if ( !@kpit ) {
@kpit = qw(if elsif unless while until for foreach); # defaults
}
# we will allow keywords and user-defined identifiers
foreach (@kpit) {
$keyword_paren_inner_tightness{$_} = $kpit_value;
}
}
# implement user whitespace preferences
if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) {
@want_left_space{@q} = (1) x scalar(@q);
}
if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) {
@want_right_space{@q} = (1) x scalar(@q);
}
if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) {
@want_left_space{@q} = (-1) x scalar(@q);
}
if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) {
@want_right_space{@q} = (-1) x scalar(@q);
}
if ( $rOpts->{'dump-want-left-space'} ) {
dump_want_left_space(*STDOUT);
Exit(0);
}
if ( $rOpts->{'dump-want-right-space'} ) {
dump_want_right_space(*STDOUT);
Exit(0);
}
initialize_space_after_keyword();
initialize_extended_block_tightness_list();
initialize_token_break_preferences();
#--------------------------------------------------------------
# The combination -lp -iob -vmll -bbx=2 can be unstable (b1266)
#--------------------------------------------------------------
# The -vmll and -lp parameters do not really work well together.
# To avoid instabilities, we will change any -bbx=2 to -bbx=1 (stable).
# NOTE: we could make this more precise by looking at any exclusion
# flags for -lp, and allowing -bbx=2 for excluded types.
if ( $rOpts->{'variable-maximum-line-length'}
&& $rOpts->{'ignore-old-breakpoints'}
&& $rOpts->{'line-up-parentheses'} )
{
my @changed;
foreach my $key ( keys %break_before_container_types ) {
if ( $break_before_container_types{$key} == 2 ) {
$break_before_container_types{$key} = 1;
push @changed, $key;
}
}
if (@changed) {
# we could write a warning here
}
}
#-----------------------------------------------------------
# The combination -lp -vmll can be unstable if -ci<2 (b1267)
#-----------------------------------------------------------
# The -vmll and -lp parameters do not really work well together.
# This is a very crude fix for an unusual parameter combination.
if ( $rOpts->{'variable-maximum-line-length'}
&& $rOpts->{'line-up-parentheses'}
&& $rOpts->{'continuation-indentation'} < 2 )
{
$rOpts->{'continuation-indentation'} = 2;
##Warn("Increased -ci=n to n=2 for stability with -lp and -vmll\n");
}
#-----------------------------------------------------------
# The combination -lp -vmll -atc -dtc can be unstable
#-----------------------------------------------------------
# This fixes b1386 b1387 b1388 which had -wtc='b'
# Updated to to include any -wtc to fix b1426
if ( $rOpts->{'variable-maximum-line-length'}
&& $rOpts->{'line-up-parentheses'}
&& $rOpts->{'add-trailing-commas'}
&& $rOpts->{'delete-trailing-commas'}
&& $rOpts->{'want-trailing-commas'} )
{
$rOpts->{'delete-trailing-commas'} = 0;
## Issuing a warning message causes trouble with test cases, and this combo is
## so rare that it is unlikely to not occur in practice. So skip warning.
## Warn(
##"The combination -vmll -lp -atc -dtc can be unstable; turning off -dtc\n"
## );
}
%container_indentation_options = ();
foreach my $pair (
[ 'break-before-hash-brace-and-indent', '{' ],
[ 'break-before-square-bracket-and-indent', '[' ],
[ 'break-before-paren-and-indent', '(' ],
)
{
my ( $key, $tok ) = @{$pair};
my $opt = $rOpts->{$key};
if ( defined($opt) && $opt > 0 && $break_before_container_types{$tok} )
{
# (1) -lp is not compatible with opt=2, silently set to opt=0
# (2) opt=0 and 2 give same result if -i=-ci; but opt=0 is faster
# (3) set opt=0 if -i < -ci (can be unstable, case b1355)
if ( $opt == 2 ) {
if (
$rOpts->{'line-up-parentheses'}
|| ( $rOpts->{'indent-columns'} <=
$rOpts->{'continuation-indentation'} )
)
{
$opt = 0;
}
}
$container_indentation_options{$tok} = $opt;
}
}
$right_bond_strength{'{'} = WEAK;
$left_bond_strength{'{'} = VERY_STRONG;
# make -l=0 equal to -l=infinite
if ( !$rOpts->{'maximum-line-length'} ) {
$rOpts->{'maximum-line-length'} = 1_000_000;
}
# make -lbl=0 equal to -lbl=infinite
if ( !$rOpts->{'long-block-line-count'} ) {
$rOpts->{'long-block-line-count'} = 1_000_000;
}
# hashes used to simplify setting whitespace
%tightness = (
'{' => $rOpts->{'brace-tightness'},
'}' => $rOpts->{'brace-tightness'},
'(' => $rOpts->{'paren-tightness'},
')' => $rOpts->{'paren-tightness'},
'[' => $rOpts->{'square-bracket-tightness'},
']' => $rOpts->{'square-bracket-tightness'},
);
if ( $rOpts->{'ignore-old-breakpoints'} ) {
my @conflicts;
if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
$rOpts->{'break-at-old-method-breakpoints'} = 0;
push @conflicts, '--break-at-old-method-breakpoints (-bom)';
}
if ( $rOpts->{'break-at-old-comma-breakpoints'} ) {
$rOpts->{'break-at-old-comma-breakpoints'} = 0;
push @conflicts, '--break-at-old-comma-breakpoints (-boc)';
}
if ( $rOpts->{'break-at-old-semicolon-breakpoints'} ) {
$rOpts->{'break-at-old-semicolon-breakpoints'} = 0;
push @conflicts, '--break-at-old-semicolon-breakpoints (-bos)';
}
if ( $rOpts->{'keep-old-breakpoints-before'} ) {
$rOpts->{'keep-old-breakpoints-before'} = EMPTY_STRING;
push @conflicts, '--keep-old-breakpoints-before (-kbb)';
}
if ( $rOpts->{'keep-old-breakpoints-after'} ) {
$rOpts->{'keep-old-breakpoints-after'} = EMPTY_STRING;
push @conflicts, '--keep-old-breakpoints-after (-kba)';
}
if (@conflicts) {
my $msg = join( "\n ",
" Conflict: These conflicts with --ignore-old-breakponts (-iob) will be turned off:",
@conflicts )
. "\n";
Warn($msg);
}
# Note: These additional parameters are made inactive by -iob.
# They are silently turned off here because they are on by default.
# We would generate unexpected warnings if we issued a warning.
$rOpts->{'break-at-old-keyword-breakpoints'} = 0;
$rOpts->{'break-at-old-logical-breakpoints'} = 0;
$rOpts->{'break-at-old-ternary-breakpoints'} = 0;
$rOpts->{'break-at-old-attribute-breakpoints'} = 0;
}
%keep_break_before_type = ();
initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-before'},
'kbb', \%keep_break_before_type );
%keep_break_after_type = ();
initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-after'},
'kba', \%keep_break_after_type );
# Modify %keep_break_before and %keep_break_after to avoid conflicts
# with %want_break_before; fixes b1436.
# This became necessary after breaks for some tokens were converted
# from hard to soft (see b1433).
# We could do this for all tokens, but to minimize changes to existing
# code we currently only do this for the soft break tokens.
foreach my $key ( keys %keep_break_before_type ) {
if ( defined( $want_break_before{$key} )
&& !$want_break_before{$key}
&& $is_soft_keep_break_type{$key} )
{
$keep_break_after_type{$key} = $keep_break_before_type{$key};
delete $keep_break_before_type{$key};
}
}
foreach my $key ( keys %keep_break_after_type ) {
if ( defined( $want_break_before{$key} )
&& $want_break_before{$key}
&& $is_soft_keep_break_type{$key} )
{
$keep_break_before_type{$key} = $keep_break_after_type{$key};
delete $keep_break_after_type{$key};
}
}
$controlled_comma_style ||= $keep_break_before_type{','};
$controlled_comma_style ||= $keep_break_after_type{','};
initialize_global_option_vars();
initialize_line_length_vars(); # after 'initialize_global_option_vars'
initialize_trailing_comma_rules(); # after 'initialize_line_length_vars'
initialize_weld_nested_exclusion_rules();
initialize_weld_fat_comma_rules();
%line_up_parentheses_control_hash = ();
$line_up_parentheses_control_is_lxpl = 1;
my $lpxl = $rOpts->{'line-up-parentheses-exclusion-list'};
my $lpil = $rOpts->{'line-up-parentheses-inclusion-list'};
if ( $lpxl && $lpil ) {
Warn( <<EOM );
You entered values for both -lpxl=s and -lpil=s; the -lpil list will be ignored
EOM
}
if ($lpxl) {
$line_up_parentheses_control_is_lxpl = 1;
initialize_line_up_parentheses_control_hash(
$rOpts->{'line-up-parentheses-exclusion-list'}, 'lpxl' );
}
elsif ($lpil) {
$line_up_parentheses_control_is_lxpl = 0;
initialize_line_up_parentheses_control_hash(
$rOpts->{'line-up-parentheses-inclusion-list'}, 'lpil' );
}
else {
## ok - neither -lpxl nor -lpil
}
return;
} ## end sub check_options
use constant ALIGN_GREP_ALIASES => 0;
sub initialize_grep_and_friends {
# Initialize or re-initialize hashes with 'grep' and grep aliases. This
# must be done after each set of options because new grep aliases may be
# used.
# re-initialize the hashes ... this is critical!
%is_sort_map_grep = ();
my @q = qw(sort map grep);
@is_sort_map_grep{@q} = (1) x scalar(@q);
my $olbxl = $rOpts->{'one-line-block-exclusion-list'};
my %is_olb_exclusion_word;
if ( defined($olbxl) ) {
my @list = split_words($olbxl);
if (@list) {
@is_olb_exclusion_word{@list} = (1) x scalar(@list);
}
}
# Make the list of block types which may be re-formed into one line.
# They will be modified with the grep-alias-list below and
# by sub 'prepare_cuddled_block_types'.
# Note that it is essential to always re-initialize the hash here:
%want_one_line_block = ();
if ( !$is_olb_exclusion_word{'*'} ) {
foreach (qw(sort map grep eval)) {
if ( !$is_olb_exclusion_word{$_} ) { $want_one_line_block{$_} = 1 }
}
}
# Note that any 'grep-alias-list' string has been preprocessed to be a
# trimmed, space-separated list.
my $str = $rOpts->{'grep-alias-list'};
my @grep_aliases = split /\s+/, $str;
if (@grep_aliases) {
@{is_sort_map_grep}{@grep_aliases} = (1) x scalar(@grep_aliases);
if ( $want_one_line_block{'grep'} ) {
@{want_one_line_block}{@grep_aliases} = (1) x scalar(@grep_aliases);
}
}
##@q = qw(sort map grep eval);
%is_sort_map_grep_eval = %is_sort_map_grep;
$is_sort_map_grep_eval{'eval'} = 1;
##@q = qw(sort map grep eval do);
%is_sort_map_grep_eval_do = %is_sort_map_grep_eval;
$is_sort_map_grep_eval_do{'do'} = 1;
# These block types can take ci. This is used by the -xci option.
# Note that the 'sub' in this list is an anonymous sub. To be more correct
# we could remove sub and use ASUB pattern to also handle a
# prototype/signature. But that would slow things down and would probably
# never be useful.
##@q = qw( do sub eval sort map grep );
%is_block_with_ci = %is_sort_map_grep_eval_do;
$is_block_with_ci{'sub'} = 1;
%is_keyword_returning_list = ();
@q = qw(
grep
keys
map
reverse
sort
split
);
push @q, @grep_aliases;
@is_keyword_returning_list{@q} = (1) x scalar(@q);
# This code enables vertical alignment of grep aliases for testing. It has
# not been found to be beneficial, so it is off by default. But it is
# useful for precise testing of the grep alias coding.
if (ALIGN_GREP_ALIASES) {
%block_type_map = (
'unless' => 'if',
'else' => 'if',
'elsif' => 'if',
'when' => 'if',
'default' => 'if',
'case' => 'if',
'sort' => 'map',
'grep' => 'map',
);
foreach (@q) {
$block_type_map{$_} = 'map' unless ( $_ eq 'map' );
}
}
return;
} ## end sub initialize_grep_and_friends
sub initialize_weld_nested_exclusion_rules {
%weld_nested_exclusion_rules = ();
my $opt_name = 'weld-nested-exclusion-list';
my $str = $rOpts->{$opt_name};
return unless ($str);
$str =~ s/^\s+//;
$str =~ s/\s+$//;
return unless ($str);
# There are four container tokens.
my %token_keys = (
'(' => '(',
'[' => '[',
'{' => '{',
'q' => 'q',
);
# We are parsing an exclusion list for nested welds. The list is a string
# with spaces separating any number of items. Each item consists of three
# pieces of information:
# <optional position> <optional type> <type of container>
# < ^ or . > < k or K > < ( [ { >
# The last character is the required container type and must be one of:
# ( = paren
# [ = square bracket
# { = brace
# An optional leading position indicator:
# ^ means the leading token position in the weld
# . means a secondary token position in the weld
# no position indicator means all positions match
# An optional alphanumeric character between the position and container
# token selects to which the rule applies:
# k = any keyword
# K = any non-keyword
# f = function call
# F = not a function call
# w = function or keyword
# W = not a function or keyword
# no letter means any preceding type matches
# Examples:
# ^( - the weld must not start with a paren
# .( - the second and later tokens may not be parens
# ( - no parens in weld
# ^K( - exclude a leading paren not preceded by a keyword
# .k( - exclude a secondary paren preceded by a keyword
# [ { - exclude all brackets and braces
my @items = split /\s+/, $str;
my $msg1;
my $msg2;
foreach my $item (@items) {
my $item_save = $item;
my $tok = chop($item);
my $key = $token_keys{$tok};
if ( !defined($key) ) {
$msg1 .= " '$item_save'";
next;
}
if ( !defined( $weld_nested_exclusion_rules{$key} ) ) {
$weld_nested_exclusion_rules{$key} = [];
}
my $rflags = $weld_nested_exclusion_rules{$key};
# A 'q' means do not weld quotes
if ( $tok eq 'q' ) {
$rflags->[0] = '*';
$rflags->[1] = '*';
next;
}
my $pos = '*';
my $select = '*';
if ($item) {
if ( $item =~ /^([\^\.])?([kKfFwW])?$/ ) {
$pos = $1 if ($1);
$select = $2 if ($2);
}
else {
$msg1 .= " '$item_save'";
next;
}
}
my $err;
if ( $pos eq '^' || $pos eq '*' ) {
if ( defined( $rflags->[0] ) && $rflags->[0] ne $select ) {
$err = 1;
}
$rflags->[0] = $select;
}
if ( $pos eq '.' || $pos eq '*' ) {
if ( defined( $rflags->[1] ) && $rflags->[1] ne $select ) {
$err = 1;
}
$rflags->[1] = $select;
}
if ($err) { $msg2 .= " '$item_save'"; }
}
if ($msg1) {
Warn(<<EOM);
Unexpecting symbol(s) encountered in --$opt_name will be ignored:
$msg1
EOM
}
if ($msg2) {
Warn(<<EOM);
Multiple specifications were encountered in the --weld-nested-exclusion-list for:
$msg2
Only the last will be used.
EOM
}
return;
} ## end sub initialize_weld_nested_exclusion_rules
sub initialize_weld_fat_comma_rules {
# Initialize a hash controlling which opening token types can be
# welded around a fat comma
%weld_fat_comma_rules = ();
# The -wfc flag turns on welding of '=>' after an opening paren
if ( $rOpts->{'weld-fat-comma'} ) { $weld_fat_comma_rules{'('} = 1 }
# This could be generalized in the future by introducing a parameter
# -weld-fat-comma-after=str (-wfca=str), where str contains any of:
# * { [ (
# to indicate which opening parens may weld to a subsequent '=>'
# The flag -wfc would then be equivalent to -wfca='('
# This has not been done because it is not yet clear how useful
# this generalization would be.
return;
} ## end sub initialize_weld_fat_comma_rules
sub initialize_line_up_parentheses_control_hash {
my ( $str, $opt_name ) = @_;
return unless ($str);
$str =~ s/^\s+//;
$str =~ s/\s+$//;
return unless ($str);
# The format is space separated items, where each item must consist of a
# string with a token type preceded by an optional text token and followed
# by an integer:
# For example:
# W(1
# = (flag1)(key)(flag2), where
# flag1 = 'W'
# key = '('
# flag2 = '1'
my @items = split /\s+/, $str;
my $msg1;
my $msg2;
foreach my $item (@items) {
my $item_save = $item;
my ( $flag1, $key, $flag2 );
if ( $item =~ /^([^\(\]\{]*)?([\(\{\[])(\d)?$/ ) {
$flag1 = $1 if $1;
$key = $2 if $2;
$flag2 = $3 if $3;
}
else {
$msg1 .= " '$item_save'";
next;
}
if ( !defined($key) ) {
$msg1 .= " '$item_save'";
next;
}
# Check for valid flag1
if ( !defined($flag1) ) { $flag1 = '*' }
if ( $flag1 !~ /^[kKfFwW\*]$/ ) {
$msg1 .= " '$item_save'";
next;
}
# Check for valid flag2
# 0 or blank: ignore container contents
# 1 all containers with sublists match
# 2 all containers with sublists, code blocks or ternary operators match
# ... this could be extended in the future
if ( !defined($flag2) ) { $flag2 = 0 }
if ( $flag2 !~ /^[012]$/ ) {
$msg1 .= " '$item_save'";
next;
}
if ( !defined( $line_up_parentheses_control_hash{$key} ) ) {
$line_up_parentheses_control_hash{$key} = [ $flag1, $flag2 ];
next;
}
# check for multiple conflicting specifications
my $rflags = $line_up_parentheses_control_hash{$key};
my $err;
if ( defined( $rflags->[0] ) && $rflags->[0] ne $flag1 ) {
$err = 1;
$rflags->[0] = $flag1;
}
if ( defined( $rflags->[1] ) && $rflags->[1] ne $flag2 ) {
$err = 1;
$rflags->[1] = $flag2;
}
$msg2 .= " '$item_save'" if ($err);
next;
}
if ($msg1) {
Warn(<<EOM);
Unexpecting symbol(s) encountered in --$opt_name will be ignored:
$msg1
EOM
}
if ($msg2) {
Warn(<<EOM);
Multiple specifications were encountered in the $opt_name at:
$msg2
Only the last will be used.
EOM
}
# Speedup: we can turn off -lp if it is not actually used
if ($line_up_parentheses_control_is_lxpl) {
my $all_off = 1;
foreach my $key (qw# ( { [ #) {
my $rflags = $line_up_parentheses_control_hash{$key};
if ( defined($rflags) ) {
my ( $flag1, $flag2 ) = @{$rflags};
if ( $flag1 && $flag1 ne '*' ) { $all_off = 0; last }
if ($flag2) { $all_off = 0; last }
}
}
if ($all_off) {
$rOpts->{'line-up-parentheses'} = EMPTY_STRING;
}
}
return;
} ## end sub initialize_line_up_parentheses_control_hash
sub initialize_space_after_keyword {
# default keywords for which space is introduced before an opening paren
# (at present, including them messes up vertical alignment)
my @sak = qw(my local our and or xor err eq ne if else elsif until
unless while for foreach return switch case given when catch);
%space_after_keyword = map { $_ => 1 } @sak;
# first remove any or all of these if desired
if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
# -nsak='*' selects all the above keywords
if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
@space_after_keyword{@q} = (0) x scalar(@q);
}
# then allow user to add to these defaults
if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
@space_after_keyword{@q} = (1) x scalar(@q);
}
return;
} ## end sub initialize_space_after_keyword
sub initialize_extended_block_tightness_list {
# Setup the control hash for --extended-block-tightness
# keywords taking indirect objects:
my @k_list = keys %is_indirect_object_taker;
# type symbols which may precede an opening block brace
my @t_list = qw($ @ % & *);
push @t_list, '$#';
my @all = ( @k_list, @t_list );
# We will build the selection in %hash
# By default the option is 'on' for keywords only (-xbtl='k')
my %hash;
@hash{@k_list} = (1) x scalar(@k_list);
@hash{@t_list} = (0) x scalar(@t_list);
# This can be overridden with -xbtl="..."
my $long_name = 'extended-block-tightness-list';
if ( $rOpts->{$long_name} ) {
my @words = split_words( $rOpts->{$long_name} );
my @unknown;
# Turn everything off
@hash{@all} = (0) x scalar(@all);
# Then turn on selections
foreach my $word (@words) {
# 'print' etc turns on a specific word or symbol
if ( defined( $hash{$word} ) ) { $hash{$word} = 1; }
# 'k' turns on all keywords
elsif ( $word eq 'k' ) {
@hash{@k_list} = (1) x scalar(@k_list);
}
# 't' turns on all symbols
elsif ( $word eq 't' ) {
@hash{@t_list} = (1) x scalar(@t_list);
}
# 'kt' same as 'k' and 't' for convenience
elsif ( $word eq 'kt' ) {
@hash{@all} = (1) x scalar(@all);
}
# Anything else is an error
else { push @unknown, $word }
}
if (@unknown) {
my $num = @unknown;
local $LIST_SEPARATOR = SPACE;
Warn(<<EOM);
$num unrecognized keyword(s) were input with --$long_name :
@unknown
EOM
}
}
# Transfer the result to the global hash
%extended_block_tightness_list = %hash;
return;
} ## end sub initialize_extended_block_tightness_list
sub initialize_token_break_preferences {
# implement user break preferences
my $break_after = sub {
my @toks = @_;
foreach my $tok (@toks) {
if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
if ( $tok eq ',' ) { $controlled_comma_style = 1 }
my $lbs = $left_bond_strength{$tok};
my $rbs = $right_bond_strength{$tok};
if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
( $lbs, $rbs );
}
}
return;
};
my $break_before = sub {
my @toks = @_;
foreach my $tok (@toks) {
if ( $tok eq ',' ) { $controlled_comma_style = 1 }
my $lbs = $left_bond_strength{$tok};
my $rbs = $right_bond_strength{$tok};
if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
( $lbs, $rbs );
}
}
return;
};
$break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
$break_before->(@all_operators)
if ( $rOpts->{'break-before-all-operators'} );
$break_after->( split_words( $rOpts->{'want-break-after'} ) );
$break_before->( split_words( $rOpts->{'want-break-before'} ) );
# make note if breaks are before certain key types
%want_break_before = ();
foreach my $tok ( @all_operators, ',' ) {
$want_break_before{$tok} =
$left_bond_strength{$tok} < $right_bond_strength{$tok};
}
# Coordinate ?/: breaks, which must be similar
# The small strength 0.01 which is added is 1% of the strength of one
# indentation level and seems to work okay.
if ( !$want_break_before{':'} ) {
$want_break_before{'?'} = $want_break_before{':'};
$right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
$left_bond_strength{'?'} = NO_BREAK;
}
# Only make a hash entry for the next parameters if values are defined.
# That allows a quick check to be made later.
%break_before_container_types = ();
for ( $rOpts->{'break-before-hash-brace'} ) {
$break_before_container_types{'{'} = $_ if $_ && $_ > 0;
}
for ( $rOpts->{'break-before-square-bracket'} ) {
$break_before_container_types{'['} = $_ if $_ && $_ > 0;
}
for ( $rOpts->{'break-before-paren'} ) {
$break_before_container_types{'('} = $_ if $_ && $_ > 0;
}
return;
} ## end sub initialize_token_break_preferences
use constant DEBUG_KB => 0;
sub initialize_keep_old_breakpoints {
my ( $str, $short_name, $rkeep_break_hash ) = @_;
return unless $str;
my %flags = ();
my @list = split_words($str);
if ( DEBUG_KB && @list ) {
local $LIST_SEPARATOR = SPACE;
print <<EOM;
DEBUG_KB entering for '$short_name' with str=$str\n";
list is: @list;
EOM
}
# Ignore kbb='(' and '[' and '{': can cause unstable math formatting
# (issues b1346, b1347, b1348) and likewise ignore kba=')' and ']' and '}'
# Also always ignore ? and : (b1440 and b1433-b1439)
if ( $short_name eq 'kbb' ) {
@list = grep { !m/[\(\[\{\?\:]/ } @list;
}
elsif ( $short_name eq 'kba' ) {
@list = grep { !m/[\)\]\}\?\:]/ } @list;
}
else {
Fault(<<EOM);
Bad call arg - received short name '$short_name' but expecting 'kbb' or 'kba'
EOM
}
# pull out any any leading container code, like f( or *{
# For example: 'f(' becomes flags hash entry '(' => 'f'
foreach my $item (@list) {
if ( $item =~ /^( [ \w\* ] )( [ \{\(\[\}\)\] ] )$/x ) {
$item = $2;
$flags{$2} = $1;
}
}
my @unknown_types;
foreach my $type (@list) {
if ( !Perl::Tidy::Tokenizer::is_valid_token_type($type) ) {
push @unknown_types, $type;
}
}
if (@unknown_types) {
my $num = @unknown_types;
local $LIST_SEPARATOR = SPACE;
Warn(<<EOM);
$num unrecognized token types were input with --$short_name :
@unknown_types
EOM
}
@{$rkeep_break_hash}{@list} = (1) x scalar(@list);
foreach my $key ( keys %flags ) {
my $flag = $flags{$key};
if ( length($flag) != 1 ) {
Warn(<<EOM);
Multiple entries given for '$key' in '$short_name'
EOM
}
elsif ( ( $key eq '(' || $key eq ')' ) && $flag !~ /^[kKfFwW\*]$/ ) {
Warn(<<EOM);
Unknown flag '$flag' given for '$key' in '$short_name'
EOM
}
elsif ( ( $key eq '}' || $key eq '}' ) && $flag !~ /^[bB\*]$/ ) {
Warn(<<EOM);
Unknown flag '$flag' given for '$key' in '$short_name'
EOM
}
else {
## ok - no error seen
}
$rkeep_break_hash->{$key} = $flag;
}
if ( DEBUG_KB && @list ) {
my @tmp = %flags;
local $LIST_SEPARATOR = SPACE;
print <<EOM;
DEBUG_KB -$short_name flag: $str
final keys: @list
special flags: @tmp
EOM
}
return;
} ## end sub initialize_keep_old_breakpoints
sub initialize_global_option_vars {
#------------------------------------------------------------
# Make global vars for frequently used options for efficiency
#------------------------------------------------------------
$rOpts_add_newlines = $rOpts->{'add-newlines'};
$rOpts_add_trailing_commas = $rOpts->{'add-trailing-commas'};
$rOpts_add_whitespace = $rOpts->{'add-whitespace'};
$rOpts_blank_lines_after_opening_block =
$rOpts->{'blank-lines-after-opening-block'};
$rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
$rOpts_block_brace_vertical_tightness =
$rOpts->{'block-brace-vertical-tightness'};
$rOpts_brace_follower_vertical_tightness =
$rOpts->{'brace-follower-vertical-tightness'};
$rOpts_break_after_labels = $rOpts->{'break-after-labels'};
$rOpts_break_at_old_attribute_breakpoints =
$rOpts->{'break-at-old-attribute-breakpoints'};
$rOpts_break_at_old_comma_breakpoints =
$rOpts->{'break-at-old-comma-breakpoints'};
$rOpts_break_at_old_keyword_breakpoints =
$rOpts->{'break-at-old-keyword-breakpoints'};
$rOpts_break_at_old_logical_breakpoints =
$rOpts->{'break-at-old-logical-breakpoints'};
$rOpts_break_at_old_semicolon_breakpoints =
$rOpts->{'break-at-old-semicolon-breakpoints'};
$rOpts_break_at_old_ternary_breakpoints =
$rOpts->{'break-at-old-ternary-breakpoints'};
$rOpts_break_open_compact_parens = $rOpts->{'break-open-compact-parens'};
$rOpts_closing_side_comments = $rOpts->{'closing-side-comments'};
$rOpts_closing_side_comment_else_flag =
$rOpts->{'closing-side-comment-else-flag'};
$rOpts_closing_side_comment_maximum_text =
$rOpts->{'closing-side-comment-maximum-text'};
$rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
$rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
$rOpts_cuddled_paren_brace = $rOpts->{'cuddled-paren-brace'};
$rOpts_delete_closing_side_comments =
$rOpts->{'delete-closing-side-comments'};
$rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
$rOpts_extended_continuation_indentation =
$rOpts->{'extended-continuation-indentation'};
$rOpts_delete_side_comments = $rOpts->{'delete-side-comments'};
$rOpts_delete_trailing_commas = $rOpts->{'delete-trailing-commas'};
$rOpts_delete_weld_interfering_commas =
$rOpts->{'delete-weld-interfering-commas'};
$rOpts_format_skipping = $rOpts->{'format-skipping'};
$rOpts_freeze_whitespace = $rOpts->{'freeze-whitespace'};
$rOpts_function_paren_vertical_alignment =
$rOpts->{'function-paren-vertical-alignment'};
$rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
$rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
$rOpts_ignore_side_comment_lengths =
$rOpts->{'ignore-side-comment-lengths'};
$rOpts_ignore_perlcritic_comments = $rOpts->{'ignore-perlcritic-comments'};
$rOpts_indent_closing_brace = $rOpts->{'indent-closing-brace'};
$rOpts_indent_columns = $rOpts->{'indent-columns'};
$rOpts_indent_only = $rOpts->{'indent-only'};
$rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
$rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
$rOpts_extended_block_tightness = $rOpts->{'extended-block-tightness'};
$rOpts_extended_line_up_parentheses =
$rOpts->{'extended-line-up-parentheses'};
$rOpts_logical_padding = $rOpts->{'logical-padding'};
$rOpts_maximum_consecutive_blank_lines =
$rOpts->{'maximum-consecutive-blank-lines'};
$rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
$rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
$rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
$rOpts_opening_brace_always_on_right =
$rOpts->{'opening-brace-always-on-right'};
$rOpts_outdent_keywords = $rOpts->{'outdent-keywords'};
$rOpts_outdent_labels = $rOpts->{'outdent-labels'};
$rOpts_outdent_long_comments = $rOpts->{'outdent-long-comments'};
$rOpts_outdent_long_quotes = $rOpts->{'outdent-long-quotes'};
$rOpts_outdent_static_block_comments =
$rOpts->{'outdent-static-block-comments'};
$rOpts_recombine = $rOpts->{'recombine'};
$rOpts_short_concatenation_item_length =
$rOpts->{'short-concatenation-item-length'};
$rOpts_space_prototype_paren = $rOpts->{'space-prototype-paren'};
$rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
$rOpts_static_block_comments = $rOpts->{'static-block-comments'};
$rOpts_add_missing_else = $rOpts->{'add-missing-else'};
$rOpts_warn_missing_else = $rOpts->{'warn-missing-else'};
$rOpts_tee_block_comments = $rOpts->{'tee-block-comments'};
$rOpts_tee_pod = $rOpts->{'tee-pod'};
$rOpts_tee_side_comments = $rOpts->{'tee-side-comments'};
$rOpts_valign_code = $rOpts->{'valign-code'};
$rOpts_valign_side_comments = $rOpts->{'valign-side-comments'};
$rOpts_valign_if_unless = $rOpts->{'valign-if-unless'};
$rOpts_variable_maximum_line_length =
$rOpts->{'variable-maximum-line-length'};
# Note that both opening and closing tokens can access the opening
# and closing flags of their container types.
%opening_vertical_tightness = (
'(' => $rOpts->{'paren-vertical-tightness'},
'{' => $rOpts->{'brace-vertical-tightness'},
'[' => $rOpts->{'square-bracket-vertical-tightness'},
')' => $rOpts->{'paren-vertical-tightness'},
'}' => $rOpts->{'brace-vertical-tightness'},
']' => $rOpts->{'square-bracket-vertical-tightness'},
);
%closing_vertical_tightness = (
'(' => $rOpts->{'paren-vertical-tightness-closing'},
'{' => $rOpts->{'brace-vertical-tightness-closing'},
'[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
')' => $rOpts->{'paren-vertical-tightness-closing'},
'}' => $rOpts->{'brace-vertical-tightness-closing'},
']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
);
# assume flag for '>' same as ')' for closing qw quotes
%closing_token_indentation = (
')' => $rOpts->{'closing-paren-indentation'},
'}' => $rOpts->{'closing-brace-indentation'},
']' => $rOpts->{'closing-square-bracket-indentation'},
'>' => $rOpts->{'closing-paren-indentation'},
);
# flag indicating if any closing tokens are indented
$some_closing_token_indentation =
$rOpts->{'closing-paren-indentation'}
|| $rOpts->{'closing-brace-indentation'}
|| $rOpts->{'closing-square-bracket-indentation'}
|| $rOpts->{'indent-closing-brace'};
%opening_token_right = (
'(' => $rOpts->{'opening-paren-right'},
'{' => $rOpts->{'opening-hash-brace-right'},
'[' => $rOpts->{'opening-square-bracket-right'},
);
%stack_opening_token = (
'(' => $rOpts->{'stack-opening-paren'},
'{' => $rOpts->{'stack-opening-hash-brace'},
'[' => $rOpts->{'stack-opening-square-bracket'},
);
%stack_closing_token = (
')' => $rOpts->{'stack-closing-paren'},
'}' => $rOpts->{'stack-closing-hash-brace'},
']' => $rOpts->{'stack-closing-square-bracket'},
);
return;
} ## end sub initialize_global_option_vars
sub initialize_line_length_vars {
# Create a table of maximum line length vs level for later efficient use.
# We will make the tables very long to be sure it will not be exceeded.
# But we have to choose a fixed length. A check will be made at the start
# of sub 'finish_formatting' to be sure it is not exceeded. Note, some of
# my standard test problems have indentation levels of about 150, so this
# should be fairly large. If the choice of a maximum level ever becomes
# an issue then these table values could be returned in a sub with a simple
# memoization scheme.
# Also create a table of the maximum spaces available for text due to the
# level only. If a line has continuation indentation, then that space must
# be subtracted from the table value. This table is used for preliminary
# estimates in welding, extended_ci, BBX, and marking short blocks.
use constant LEVEL_TABLE_MAX => 1000;
# The basic scheme:
foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
my $indent = $level * $rOpts_indent_columns;
$maximum_line_length_at_level[$level] = $rOpts_maximum_line_length;
$maximum_text_length_at_level[$level] =
$rOpts_maximum_line_length - $indent;
}
# Correct the maximum_text_length table if the -wc=n flag is used
$rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
if ($rOpts_whitespace_cycle) {
if ( $rOpts_whitespace_cycle > 0 ) {
foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
my $level_mod = $level % $rOpts_whitespace_cycle;
my $indent = $level_mod * $rOpts_indent_columns;
$maximum_text_length_at_level[$level] =
$rOpts_maximum_line_length - $indent;
}
}
else {
$rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'} = 0;
}
}
# Correct the tables if the -vmll flag is used. These values override the
# previous values.
if ($rOpts_variable_maximum_line_length) {
foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
$maximum_text_length_at_level[$level] = $rOpts_maximum_line_length;
$maximum_line_length_at_level[$level] =
$rOpts_maximum_line_length + $level * $rOpts_indent_columns;
}
}
# Define two measures of indentation level, alpha and beta, at which some
# formatting features come under stress and need to start shutting down.
# Some combination of the two will be used to shut down different
# formatting features.
# Put a reasonable upper limit on stress level (say 100) in case the
# whitespace-cycle variable is used.
my $stress_level_limit = min( 100, LEVEL_TABLE_MAX );
# Find stress_level_alpha, targeted at very short maximum line lengths.
$stress_level_alpha = $stress_level_limit + 1;
foreach my $level_test ( 0 .. $stress_level_limit ) {
my $max_len = $maximum_text_length_at_level[ $level_test + 1 ];
my $excess_inside_space =
$max_len -
$rOpts_continuation_indentation -
$rOpts_indent_columns - 8;
if ( $excess_inside_space <= 0 ) {
$stress_level_alpha = $level_test;
last;
}
}
# Find stress level beta, a stress level targeted at formatting
# at deep levels near the maximum line length. We start increasing
# from zero and stop at the first level which shows no more space.
# 'const' is a fixed number of spaces for a typical variable.
# Cases b1197-b1204 work ok with const=12 but not with const=8
my $const = 16;
my $denom = max( 1, $rOpts_indent_columns );
$stress_level_beta = 0;
foreach my $level ( 0 .. $stress_level_limit ) {
my $remaining_cycles = max(
0,
(
$maximum_text_length_at_level[$level] -
$rOpts_continuation_indentation - $const
) / $denom
);
last if ( $remaining_cycles <= 3 ); # 2 does not work
$stress_level_beta = $level;
}
# This is a combined level which works well for turning off formatting
# features in most cases:
$high_stress_level = min( $stress_level_alpha, $stress_level_beta + 2 );
return;
} ## end sub initialize_line_length_vars
sub initialize_trailing_comma_rules {
# Setup control hash for trailing commas
# -wtc=s defines desired trailing comma policy:
#
# =" " stable
# [ both -atc and -dtc ignored ]
# =0 : none
# [requires -dtc; -atc ignored]
# =1 or * : all
# [requires -atc; -dtc ignored]
# =m : multiline lists require trailing comma
# if -atc set => will add missing multiline trailing commas
# if -dtc set => will delete trailing single line commas
# =b or 'bare' (multiline) lists require trailing comma
# if -atc set => will add missing bare trailing commas
# if -dtc set => will delete non-bare trailing commas
# =h or 'hash': single column stable bare lists require trailing comma
# if -atc set will add these
# if -dtc set will delete other trailing commas
#-------------------------------------------------------------------
# This routine must be called after the alpha and beta stress levels
# have been defined in sub 'initialize_line_length_vars'.
#-------------------------------------------------------------------
%trailing_comma_rules = ();
my $rvalid_flags = [qw(0 1 * m b h i)];
my $option = $rOpts->{'want-trailing-commas'};
if ($option) {
$option =~ s/^\s+//;
$option =~ s/\s+$//;
}
# We need to use length() here because '0' is a possible option
if ( defined($option) && length($option) ) {
my $error_message;
my %rule_hash;
my @q = @{$rvalid_flags};
my %is_valid_flag;
@is_valid_flag{@q} = (1) x scalar(@q);
# handle single character control, such as -wtc='b'
if ( length($option) == 1 ) {
foreach (qw< ) ] } >) {
$rule_hash{$_} = [ $option, EMPTY_STRING ];
}
}
# handle multi-character control(s), such as -wtc='[m' or -wtc='k(m'
else {
my @parts = split /\s+/, $option;
foreach my $part (@parts) {
if ( length($part) >= 2 && length($part) <= 3 ) {
my $val = substr( $part, -1, 1 );
my $key_o = substr( $part, -2, 1 );
if ( $is_opening_token{$key_o} ) {
my $paren_flag = EMPTY_STRING;
if ( length($part) == 3 ) {
$paren_flag = substr( $part, 0, 1 );
}
my $key = $matching_token{$key_o};
$rule_hash{$key} = [ $val, $paren_flag ];
}
else {
$error_message .= "Unrecognized term: '$part'\n";
}
}
else {
$error_message .= "Unrecognized term: '$part'\n";
}
}
}
# check for valid control characters
if ( !$error_message ) {
foreach my $key ( keys %rule_hash ) {
my $item = $rule_hash{$key};
my ( $val, $paren_flag ) = @{$item};
if ( $val && !$is_valid_flag{$val} ) {
my $valid_str = join( SPACE, @{$rvalid_flags} );
$error_message .=
"Unexpected value '$val'; must be one of: $valid_str\n";
last;
}
if ($paren_flag) {
if ( $paren_flag !~ /^[kKfFwW]$/ ) {
$error_message .=
"Unexpected paren flag '$paren_flag'; must be one of: k K f F w W\n";
last;
}
if ( $key ne ')' ) {
$error_message .=
"paren flag '$paren_flag' is only allowed before a '('\n";
last;
}
}
}
}
if ($error_message) {
Warn(<<EOM);
Error parsing --want-trailing-commas='$option':
$error_message
EOM
}
# Set the control hash if no errors
else {
%trailing_comma_rules = %rule_hash;
}
}
# Both adding and deleting commas can lead to instability in extreme cases
if ( $rOpts_add_trailing_commas && $rOpts_delete_trailing_commas ) {
# If the possible instability is significant, then we can turn off
# -dtc as a defensive measure to prevent it.
# We must turn off -dtc for very small values of --whitespace-cycle
# to avoid instability. A minimum value of -wc=3 fixes b1393, but a
# value of 4 is used here for safety. This parameter is seldom used,
# and much larger than this when used, so the cutoff value is not
# critical.
if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle <= 4 ) {
$rOpts_delete_trailing_commas = 0;
}
}
return;
} ## end sub initialize_trailing_comma_rules
sub initialize_whitespace_hashes {
# This is called once before formatting begins to initialize these global
# hashes, which control the use of whitespace around tokens:
#
# %binary_ws_rules
# %want_left_space
# %want_right_space
# %space_after_keyword
#
# Many token types are identical to the tokens themselves.
# See the tokenizer for a complete list. Here are some special types:
# k = perl keyword
# f = semicolon in for statement
# m = unary minus
# p = unary plus
# Note that :: is excluded since it should be contained in an identifier
# Note that '->' is excluded because it never gets space
# parentheses and brackets are excluded since they are handled specially
# curly braces are included but may be overridden by logic, such as
# newline logic.
# NEW_TOKENS: create a whitespace rule here. This can be as
# simple as adding your new letter to @spaces_both_sides, for
# example.
# fix for c250: added space rules new package type 'P' and sub type 'S'
my @spaces_both_sides = qw#
+ - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
.= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
&&= ||= //= <=> A k f w F n C Y U G v P S
#;
my @spaces_left_side = qw<
t ! ~ m p { \ h pp mm Z j
>;
push( @spaces_left_side, '#' ); # avoids warning message
my @spaces_right_side = qw<
; } ) ] R J ++ -- **=
>;
push( @spaces_right_side, ',' ); # avoids warning message
%want_left_space = ();
%want_right_space = ();
%binary_ws_rules = ();
# Note that we setting defaults here. Later in processing
# the values of %want_left_space and %want_right_space
# may be overridden by any user settings specified by the
# -wls and -wrs parameters. However the binary_whitespace_rules
# are hardwired and have priority.
@want_left_space{@spaces_both_sides} =
(1) x scalar(@spaces_both_sides);
@want_right_space{@spaces_both_sides} =
(1) x scalar(@spaces_both_sides);
@want_left_space{@spaces_left_side} =
(1) x scalar(@spaces_left_side);
@want_right_space{@spaces_left_side} =
(-1) x scalar(@spaces_left_side);
@want_left_space{@spaces_right_side} =
(-1) x scalar(@spaces_right_side);
@want_right_space{@spaces_right_side} =
(1) x scalar(@spaces_right_side);
$want_left_space{'->'} = WS_NO;
$want_right_space{'->'} = WS_NO;
$want_left_space{'**'} = WS_NO;
$want_right_space{'**'} = WS_NO;
$want_right_space{'CORE::'} = WS_NO;
# These binary_ws_rules are hardwired and have priority over the above
# settings. It would be nice to allow adjustment by the user,
# but it would be complicated to specify.
#
# hash type information must stay tightly bound
# as in : ${xxxx}
$binary_ws_rules{'i'}{'L'} = WS_NO;
$binary_ws_rules{'i'}{'{'} = WS_YES;
$binary_ws_rules{'k'}{'{'} = WS_YES;
$binary_ws_rules{'U'}{'{'} = WS_YES;
$binary_ws_rules{'i'}{'['} = WS_NO;
$binary_ws_rules{'R'}{'L'} = WS_NO;
$binary_ws_rules{'R'}{'{'} = WS_NO;
$binary_ws_rules{'t'}{'L'} = WS_NO;
$binary_ws_rules{'t'}{'{'} = WS_NO;
$binary_ws_rules{'t'}{'='} = WS_OPTIONAL; # for signatures; fixes b1123
$binary_ws_rules{'}'}{'L'} = WS_NO;
$binary_ws_rules{'}'}{'{'} = WS_OPTIONAL; # RT#129850; was WS_NO
$binary_ws_rules{'$'}{'L'} = WS_NO;
$binary_ws_rules{'$'}{'{'} = WS_NO;
$binary_ws_rules{'@'}{'L'} = WS_NO;
$binary_ws_rules{'@'}{'{'} = WS_NO;
$binary_ws_rules{'='}{'L'} = WS_YES;
$binary_ws_rules{'J'}{'J'} = WS_YES;
# the following includes ') {'
# as in : if ( xxx ) { yyy }
$binary_ws_rules{']'}{'L'} = WS_NO;
$binary_ws_rules{']'}{'{'} = WS_NO;
$binary_ws_rules{')'}{'{'} = WS_YES;
$binary_ws_rules{')'}{'['} = WS_NO;
$binary_ws_rules{']'}{'['} = WS_NO;
$binary_ws_rules{']'}{'{'} = WS_NO;
$binary_ws_rules{'}'}{'['} = WS_NO;
$binary_ws_rules{'R'}{'['} = WS_NO;
$binary_ws_rules{']'}{'++'} = WS_NO;
$binary_ws_rules{']'}{'--'} = WS_NO;
$binary_ws_rules{')'}{'++'} = WS_NO;
$binary_ws_rules{')'}{'--'} = WS_NO;
$binary_ws_rules{'R'}{'++'} = WS_NO;
$binary_ws_rules{'R'}{'--'} = WS_NO;
$binary_ws_rules{'i'}{'Q'} = WS_YES;
$binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
$binary_ws_rules{'i'}{'('} = WS_NO;
$binary_ws_rules{'w'}{'('} = WS_NO;
$binary_ws_rules{'w'}{'{'} = WS_YES;
return;
} ## end sub initialize_whitespace_hashes
{ #<<< begin closure set_whitespace_flags
my %is_special_ws_type;
my %is_wCUG;
my %is_wi;
BEGIN {
# The following hash is used to skip over needless if tests.
# Be sure to update it when adding new checks in its block.
my @q = qw(k w C m - Q);
push @q, '#';
@is_special_ws_type{@q} = (1) x scalar(@q);
# These hashes replace slower regex tests
@q = qw( w C U G );
@is_wCUG{@q} = (1) x scalar(@q);
@q = qw( w i );
@is_wi{@q} = (1) x scalar(@q);
} ## end BEGIN
use constant DEBUG_WHITE => 0;
# Hashes to set spaces around container tokens according to their
# sequence numbers. These are set as keywords are examined.
# They are controlled by the -kpit and -kpitl flags.
my %opening_container_inside_ws;
my %closing_container_inside_ws;
sub set_whitespace_flags {
# This routine is called once per file to set whitespace flags for that
# file. This routine examines each pair of nonblank tokens and sets a flag
# indicating if white space is needed.
#
# $rwhitespace_flags->[$j] is a flag indicating whether a white space
# BEFORE token $j is needed, with the following values:
#
# WS_NO = -1 do not want a space BEFORE token $j
# WS_OPTIONAL= 0 optional space or $j is a whitespace
# WS_YES = 1 want a space BEFORE token $j
#
my $self = shift;
my $j_tight_closing_paren = -1;
my $rLL = $self->[_rLL_];
my $jmax = @{$rLL} - 1;
%opening_container_inside_ws = ();
%closing_container_inside_ws = ();
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
my $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
my $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
my $rwhitespace_flags = [];
my $ris_function_call_paren = {};
return $rwhitespace_flags if ( $jmax < 0 );
my %is_for_foreach = ( 'for' => 1, 'foreach' => 1 );
my $last_token = SPACE;
my $last_type = 'b';
my $last_token_dbg = SPACE;
my $last_type_dbg = 'b';
my $rtokh_last = [ @{ $rLL->[0] } ];
$rtokh_last->[_TOKEN_] = $last_token;
$rtokh_last->[_TYPE_] = $last_type;
$rtokh_last->[_TYPE_SEQUENCE_] = EMPTY_STRING;
$rtokh_last->[_LINE_INDEX_] = 0;
my $rtokh_last_last = $rtokh_last;
# This will identify braces to be treated as blocks for the -xbt flag
my %block_type_for_tightness;
my ( $ws_1, $ws_2, $ws_3, $ws_4 );
# main loop over all tokens to define the whitespace flags
my $last_type_is_opening;
my ( $token, $type );
my $j = -1;
foreach my $rtokh ( @{$rLL} ) {
$j++;
$type = $rtokh->[_TYPE_];
if ( $type eq 'b' ) {
$rwhitespace_flags->[$j] = WS_OPTIONAL;
next;
}
$token = $rtokh->[_TOKEN_];
my $ws;
#---------------------------------------------------------------
# Whitespace Rules Section 1:
# Handle space on the inside of opening braces.
#---------------------------------------------------------------
# /^[L\{\(\[]$/
if ($last_type_is_opening) {
$last_type_is_opening = 0;
my $seqno = $rtokh->[_TYPE_SEQUENCE_];
my $block_type = $rblock_type_of_seqno->{$seqno};
my $last_seqno = $rtokh_last->[_TYPE_SEQUENCE_];
my $last_block_type = $rblock_type_of_seqno->{$last_seqno}
|| $block_type_for_tightness{$last_seqno};
$j_tight_closing_paren = -1;
# let us keep empty matched braces together: () {} []
# except for BLOCKS
if ( $token eq $matching_token{$last_token} ) {
if ($block_type) {
$ws = WS_YES;
}
else {
$ws = WS_NO;
}
}
else {
# we're considering the right of an opening brace
# tightness = 0 means always pad inside with space
# tightness = 1 means pad inside if "complex"
# tightness = 2 means never pad inside with space
my $tightness;
if ( $last_block_type && $last_token eq '{' ) {
$tightness = $rOpts_block_brace_tightness;
}
else { $tightness = $tightness{$last_token} }
#=============================================================
# Patch for test problem <<snippets/fabrice_bug.in>>
# We must always avoid spaces around a bare word beginning
# with ^ as in:
# my $before = ${^PREMATCH};
# Because all of the following cause an error in perl:
# my $before = ${ ^PREMATCH };
# my $before = ${ ^PREMATCH};
# my $before = ${^PREMATCH };
# So if brace tightness flag is -bt=0 we must temporarily reset
# to bt=1. Note that here we must set tightness=1 and not 2 so
# that the closing space is also avoided
# (via the $j_tight_closing_paren flag in coding)
if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
#=============================================================
if ( $tightness <= 0 ) {
$ws = WS_YES;
}
elsif ( $tightness > 1 ) {
$ws = WS_NO;
}
else {
# find the index of the closing token
my $j_closing =
$self->[_K_closing_container_]->{$last_seqno};
# If the closing token is less than five characters ahead
# we must take a closer look
if ( defined($j_closing)
&& $j_closing - $j < 5
&& $rLL->[$j_closing]->[_TYPE_SEQUENCE_] eq
$last_seqno )
{
$ws =
ws_in_container( $j, $j_closing, $rLL, $type, $token,
$last_token );
if ( $ws == WS_NO ) {
$j_tight_closing_paren = $j_closing;
}
}
else {
$ws = WS_YES;
}
}
}
# check for special cases which override the above rules
if ( %opening_container_inside_ws && $last_seqno ) {
my $ws_override = $opening_container_inside_ws{$last_seqno};
if ($ws_override) { $ws = $ws_override }
}
$ws_4 = $ws_3 = $ws_2 = $ws_1 = $ws
if DEBUG_WHITE;
} ## end setting space flag inside opening tokens
#---------------------------------------------------------------
# Whitespace Rules Section 2:
# Special checks for certain types ...
#---------------------------------------------------------------
# The hash '%is_special_ws_type' significantly speeds up this routine,
# but be sure to update it if a new check is added.
# Currently has types: qw(k w C m - Q #)
if ( $is_special_ws_type{$type} ) {
if ( $type eq 'k' ) {
# Keywords 'for', 'foreach' are special cases for -kpit since
# the opening paren does not always immediately follow the
# keyword. So we have to search forward for the paren in this
# case. I have limited the search to 10 tokens ahead, just in
# case somebody has a big file and no opening paren. This
# should be enough for all normal code. Added the level check
# to fix b1236.
if ( $is_for_foreach{$token}
&& %keyword_paren_inner_tightness
&& defined( $keyword_paren_inner_tightness{$token} )
&& $j < $jmax )
{
my $level = $rLL->[$j]->[_LEVEL_];
my $jp = $j;
## NOTE: we might use the KNEXT variable to avoid this loop
## but profiling shows that little would be saved
foreach my $inc ( 1 .. 9 ) {
$jp++;
last if ( $jp > $jmax );
last if ( $rLL->[$jp]->[_LEVEL_] != $level ); # b1236
next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' );
my $seqno_p = $rLL->[$jp]->[_TYPE_SEQUENCE_];
set_container_ws_by_keyword( $token, $seqno_p );
last;
}
}
}
# handle a comment
elsif ( $type eq '#' ) {
# newline before block comment ($j==0), and
# space before side comment ($j>0), so ..
$ws = WS_YES;
#---------------------------------
# Nothing more to do for a comment
#---------------------------------
$rwhitespace_flags->[$j] = $ws;
next;
}
# space_backslash_quote; RT #123774 <<snippets/rt123774.in>>
# allow a space between a backslash and single or double quote
# to avoid fooling html formatters
elsif ( $type eq 'Q' ) {
if ( $last_type eq '\\' && $token =~ /^[\"\']/ ) {
$ws =
!$rOpts_space_backslash_quote ? WS_NO
: $rOpts_space_backslash_quote == 1 ? WS_OPTIONAL
: $rOpts_space_backslash_quote == 2 ? WS_YES
: WS_YES;
}
}
# retain any space between '-' and bare word
elsif ( $type eq 'w' || $type eq 'C' ) {
$ws = WS_OPTIONAL if $last_type eq '-';
}
# retain any space between '-' and bare word; for example
# avoid space between 'USER' and '-' here: <<snippets/space2.in>>
# $myhash{USER-NAME}='steve';
elsif ( $type eq 'm' || $type eq '-' ) {
$ws = WS_OPTIONAL if ( $last_type eq 'w' );
}
else {
# A type $type was entered in %is_special_ws_type but
# there is no code block to handle it. Either remove it
# from the hash or add a code block to handle it.
DEVEL_MODE && Fault("no code to handle type $type\n");
}
} ## end elsif ( $is_special_ws_type{$type} ...
#---------------------------------------------------------------
# Whitespace Rules Section 3:
# Handle space on inside of closing brace pairs.
#---------------------------------------------------------------
# /[\}\)\]R]/
elsif ( $is_closing_type{$type} ) {
my $seqno = $rtokh->[_TYPE_SEQUENCE_];
if ( $j == $j_tight_closing_paren ) {
$j_tight_closing_paren = -1;
$ws = WS_NO;
}
else {
if ( !defined($ws) ) {
my $tightness;
my $block_type = $rblock_type_of_seqno->{$seqno}
|| $block_type_for_tightness{$seqno};
if ( $block_type && $token eq '}' ) {
$tightness = $rOpts_block_brace_tightness;
}
else { $tightness = $tightness{$token} }
$ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
}
}
# check for special cases which override the above rules
if ( %closing_container_inside_ws && $seqno ) {
my $ws_override = $closing_container_inside_ws{$seqno};
if ($ws_override) { $ws = $ws_override }
}
$ws_4 = $ws_3 = $ws_2 = $ws
if DEBUG_WHITE;
} ## end setting space flag inside closing tokens
#---------------------------------------------------------------
# Whitespace Rules Section 4:
#---------------------------------------------------------------
# /^[L\{\(\[]$/
elsif ( $is_opening_type{$type} ) {
$last_type_is_opening = 1;
if ( $token eq '(' ) {
my $seqno = $rtokh->[_TYPE_SEQUENCE_];
# This will have to be tweaked as tokenization changes.
# We usually want a space at '} (', for example:
# <<snippets/space1.in>>
# map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
#
# But not others:
# &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
# At present, the above & block is marked as type L/R so this
# case won't go through here.
if ( $last_type eq '}' && $last_token ne ')' ) { $ws = WS_YES }
# NOTE: some older versions of Perl had occasional problems if
# spaces are introduced between keywords or functions and
# opening parens. So the default is not to do this except is
# certain cases. The current Perl seems to tolerate spaces.
# Space between keyword and '('
elsif ( $last_type eq 'k' ) {
$ws = WS_NO
unless ( $rOpts_space_keyword_paren
|| $space_after_keyword{$last_token} );
# Set inside space flag if requested
set_container_ws_by_keyword( $last_token, $seqno );
}
# Space between function and '('
# -----------------------------------------------------
# 'w' and 'i' checks for something like:
# myfun( &myfun( ->myfun(
# -----------------------------------------------------
# Note that at this point an identifier may still have a
# leading arrow, but the arrow will be split off during token
# respacing. After that, the token may become a bare word
# without leading arrow. The point is, it is best to mark
# function call parens right here before that happens.
# Patch: added 'C' to prevent blinker, case b934, i.e. 'pi()'
# NOTE: this would be the place to allow spaces between
# repeated parens, like () () (), as in case c017, but I
# decided that would not be a good idea.
# Updated to allow detached '->' from tokenizer (issue c140)
elsif (
# /^[wCUG]$/
$is_wCUG{$last_type}
|| (
# /^[wi]$/
$is_wi{$last_type}
&& (
# with prefix '->' or '&'
$last_token =~ /^([\&]|->)/
# or preceding token '->' (see b1337; c140)
|| $rtokh_last_last->[_TYPE_] eq '->'
# or preceding sub call operator token '&'
|| ( $rtokh_last_last->[_TYPE_] eq 't'
&& $rtokh_last_last->[_TOKEN_] =~ /^\&\s*$/ )
)
)
)
{
$ws =
$rOpts_space_function_paren
? $self->ws_space_function_paren( $j, $rtokh_last_last )
: WS_NO;
set_container_ws_by_keyword( $last_token, $seqno );
$ris_function_call_paren->{$seqno} = 1;
}
# space between something like $i and ( in 'snippets/space2.in'
# for $i ( 0 .. 20 ) {
elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
$ws = WS_YES;
}
# allow constant function followed by '()' to retain no space
elsif ($last_type eq 'C'
&& $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
{
$ws = WS_NO;
}
else {
# ok - opening paren not covered by a special rule
}
}
# patch for SWITCH/CASE: make space at ']{' optional
# since the '{' might begin a case or when block
elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
$ws = WS_OPTIONAL;
}
else {
# ok - opening type not covered by a special rule
}
# keep space between 'sub' and '{' for anonymous sub definition,
# be sure type = 'k' (added for c140)
if ( $type eq '{' ) {
if ( $last_token eq 'sub' && $last_type eq 'k' ) {
$ws = WS_YES;
}
# this is needed to avoid no space in '){'
if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
# avoid any space before the brace or bracket in something like
# @opts{'a','b',...}
if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
$ws = WS_NO;
}
}
# The --extended-block-tightness option allows certain braces
# to be treated as blocks just for setting inner whitespace
if ( $rOpts_extended_block_tightness && $token eq '{' ) {
my $seqno = $rtokh->[_TYPE_SEQUENCE_];
if ( !$rblock_type_of_seqno->{$seqno}
&& $extended_block_tightness_list{$last_token} )
{
# Ok - make this brace a block type for tightness only
$block_type_for_tightness{$seqno} = $last_token;
}
}
} ## end elsif ( $is_opening_type{$type} ) {
else {
# ok: $type not opening, closing, or covered by a special rule
}
# always preserve whatever space was used after a possible
# filehandle (except _) or here doc operator
if (
(
( $last_type eq 'Z' && $last_token ne '_' )
|| $last_type eq 'h'
)
&& $type ne '#' # no longer required due to early exit for '#' above
)
{
# no space for '$ {' even if '$' is marked as type 'Z', issue c221
if ( $last_type eq 'Z' && $last_token eq '$' && $token eq '{' ) {
$ws = WS_NO;
}
else {
$ws = WS_OPTIONAL;
}
}
$ws_4 = $ws_3 = $ws
if DEBUG_WHITE;
if ( !defined($ws) ) {
#---------------------------------------------------------------
# Whitespace Rules Section 4:
# Use the binary rule table.
#---------------------------------------------------------------
if ( defined( $binary_ws_rules{$last_type}{$type} ) ) {
$ws = $binary_ws_rules{$last_type}{$type};
$ws_4 = $ws if DEBUG_WHITE;
}
#---------------------------------------------------------------
# Whitespace Rules Section 5:
# Apply default rules not covered above.
#---------------------------------------------------------------
# If we fall through to here, look at the pre-defined hash tables
# for the two tokens, and:
# if (they are equal) use the common value
# if (either is zero or undef) use the other
# if (either is -1) use it
# That is,
# left vs right
# 1 vs 1 --> 1
# 0 vs 0 --> 0
# -1 vs -1 --> -1
#
# 0 vs -1 --> -1
# 0 vs 1 --> 1
# 1 vs 0 --> 1
# -1 vs 0 --> -1
#
# -1 vs 1 --> -1
# 1 vs -1 --> -1
else {
my $wl = $want_left_space{$type};
my $wr = $want_right_space{$last_type};
if ( !defined($wl) ) {
$ws = defined($wr) ? $wr : 0;
}
elsif ( !defined($wr) ) {
$ws = $wl;
}
else {
$ws =
( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
}
}
}
# Treat newline as a whitespace. Otherwise, we might combine
# 'Send' and '-recipients' here according to the above rules:
# <<snippets/space3.in>>
# my $msg = new Fax::Send
# -recipients => $to,
# -data => $data;
if ( !$ws
&& $rtokh->[_LINE_INDEX_] != $rtokh_last->[_LINE_INDEX_] )
{
$ws = WS_YES;
}
$rwhitespace_flags->[$j] = $ws;
# remember non-blank, non-comment tokens
$last_token = $token;
$last_type = $type;
$rtokh_last_last = $rtokh_last;
$rtokh_last = $rtokh;
# Programming note: for some reason, it is very much faster to 'next'
# out of this loop here than to put the DEBUG coding in a block.
# But note that the debug code must then update its own copies
# of $last_token and $last_type.
next if ( !DEBUG_WHITE );
my $str = substr( $last_token_dbg, 0, 15 );
$str .= SPACE x ( 16 - length($str) );
if ( !defined($ws_1) ) { $ws_1 = "*" }
if ( !defined($ws_2) ) { $ws_2 = "*" }
if ( !defined($ws_3) ) { $ws_3 = "*" }
if ( !defined($ws_4) ) { $ws_4 = "*" }
print {*STDOUT}
"NEW WHITE: i=$j $str $last_type_dbg $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
# reset for next pass
$ws_1 = $ws_2 = $ws_3 = $ws_4 = undef;
$last_token_dbg = $token;
$last_type_dbg = $type;
} ## end main loop
if ( $rOpts->{'tight-secret-operators'} ) {
new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
}
$self->[_ris_function_call_paren_] = $ris_function_call_paren;
return $rwhitespace_flags;
} ## end sub set_whitespace_flags
sub set_container_ws_by_keyword {
my ( $word, $sequence_number ) = @_;
return unless (%keyword_paren_inner_tightness);
# We just saw a keyword (or other function name) followed by an opening
# paren. Now check to see if the following paren should have special
# treatment for its inside space. If so we set a hash value using the
# sequence number as key.
if ( $word && $sequence_number ) {
my $tightness = $keyword_paren_inner_tightness{$word};
if ( defined($tightness) && $tightness != 1 ) {
my $ws_flag = $tightness == 0 ? WS_YES : WS_NO;
$opening_container_inside_ws{$sequence_number} = $ws_flag;
$closing_container_inside_ws{$sequence_number} = $ws_flag;
}
}
return;
} ## end sub set_container_ws_by_keyword
sub ws_in_container {
my ( $j, $j_closing, $rLL, $type, $token, $last_token ) = @_;
# Given:
# $j = index of token following an opening container token
# $type, $token = the type and token at index $j
# $j_closing = closing token of the container
# $last_token = the opening token of the container
# Return:
# WS_NO if there is just one token in the container (with exceptions)
# WS_YES otherwise
#------------------------------------
# Look forward for the closing token;
#------------------------------------
if ( $j + 1 > $j_closing ) { return WS_NO }
# Patch to count '-foo' as single token so that
# each of $a{-foo} and $a{foo} and $a{'foo'} do
# not get spaces with default formatting.
my $j_here = $j;
++$j_here
if ( $token eq '-'
&& $last_token eq '{'
&& $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
# Patch to count a sign separated from a number as a single token, as
# in the following line. Otherwise, it takes two steps to converge:
# deg2rad(- 0.5)
if ( ( $type eq 'm' || $type eq 'p' )
&& $j < $j_closing + 1
&& $rLL->[ $j + 1 ]->[_TYPE_] eq 'b'
&& $rLL->[ $j + 2 ]->[_TYPE_] eq 'n'
&& $rLL->[ $j + 2 ]->[_TOKEN_] =~ /^\d/ )
{
$j_here = $j + 2;
}
# $j_next is where a closing token should be if the container has
# just a "single" token
if ( $j_here + 1 > $j_closing ) { return WS_NO }
my $j_next =
( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
? $j_here + 2
: $j_here + 1;
#-----------------------------------------------------------------
# Now decide: if we get to the closing token we will keep it tight
#-----------------------------------------------------------------
if (
$j_next == $j_closing
# OLD PROBLEM: but watch out for this: [ [ ] (misc.t)
# No longer necessary because of the previous check on sequence numbers
##&& $last_token ne $token
# double diamond is usually spaced
&& $token ne '<<>>'
)
{
return WS_NO;
}
return WS_YES;
} ## end sub ws_in_container
sub ws_space_function_paren {
my ( $self, $j, $rtokh_last_last ) = @_;
# Called if --space-function-paren is set to see if it might cause
# a problem. The manual warns the user about potential problems with
# this flag. Here we just try to catch one common problem.
# Given:
# $j = index of '(' after function name
# Return:
# WS_NO if no space
# WS_YES otherwise
# This was added to fix for issue c166. Ignore -sfp at a possible indirect
# object location. For example, do not convert this:
# print header() ...
# to this:
# print header () ...
# because in this latter form, header may be taken to be a file handle
# instead of a function call.
# Start with the normal value for -sfp:
my $ws = WS_YES;
# now check to be sure we don't cause a problem:
my $type_ll = $rtokh_last_last->[_TYPE_];
my $tok_ll = $rtokh_last_last->[_TOKEN_];
# NOTE: this is just a minimal check. For example, we might also check
# for something like this:
# print ( header ( ..
if ( $type_ll eq 'k' && $is_indirect_object_taker{$tok_ll} ) {
$ws = WS_NO;
}
return $ws;
} ## end sub ws_space_function_paren
} ## end closure set_whitespace_flags
sub dump_want_left_space {
my $fh = shift;
local $LIST_SEPARATOR = "\n";
$fh->print(<<EOM);
These values are the main control of whitespace to the left of a token type;
They may be altered with the -wls parameter.
For a list of token types, use perltidy --dump-token-types (-dtt)
1 means the token wants a space to its left
-1 means the token does not want a space to its left
------------------------------------------------------------------------
EOM
foreach my $key ( sort keys %want_left_space ) {
$fh->print("$key\t$want_left_space{$key}\n");
}
return;
} ## end sub dump_want_left_space
sub dump_want_right_space {
my $fh = shift;
local $LIST_SEPARATOR = "\n";
$fh->print(<<EOM);
These values are the main control of whitespace to the right of a token type;
They may be altered with the -wrs parameter.
For a list of token types, use perltidy --dump-token-types (-dtt)
1 means the token wants a space to its right
-1 means the token does not want a space to its right
------------------------------------------------------------------------
EOM
foreach my $key ( sort keys %want_right_space ) {
$fh->print("$key\t$want_right_space{$key}\n");
}
return;
} ## end sub dump_want_right_space
{ ## begin closure is_essential_whitespace
my %is_sort_grep_map;
my %is_for_foreach;
my %is_digraph;
my %is_trigraph;
my %essential_whitespace_filter_l1;
my %essential_whitespace_filter_r1;
my %essential_whitespace_filter_l2;
my %essential_whitespace_filter_r2;
my %is_type_with_space_before_bareword;
my %is_special_variable_char;
BEGIN {
my @q;
# NOTE: This hash is like the global %is_sort_map_grep, but it ignores
# grep aliases on purpose, since here we are looking parens, not braces
@q = qw(sort grep map);
@is_sort_grep_map{@q} = (1) x scalar(@q);
@q = qw(for foreach);
@is_for_foreach{@q} = (1) x scalar(@q);
@q = qw(
.. :: << >> ** && || // -> => += -= .= %= &= |= ^= *= <>
<= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
);
@is_digraph{@q} = (1) x scalar(@q);
@q = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~);
@is_trigraph{@q} = (1) x scalar(@q);
# These are used as a speedup filters for sub is_essential_whitespace.
# Filter 1:
# These left side token types USUALLY do not require a space:
@q = qw( ; { } [ ] L R );
push @q, ',';
push @q, ')';
push @q, '(';
@essential_whitespace_filter_l1{@q} = (1) x scalar(@q);
# BUT some might if followed by these right token types
@q = qw( pp mm << <<= h );
@essential_whitespace_filter_r1{@q} = (1) x scalar(@q);
# Filter 2:
# These right side filters usually do not require a space
@q = qw( ; ] R } );
push @q, ',';
push @q, ')';
@essential_whitespace_filter_r2{@q} = (1) x scalar(@q);
# BUT some might if followed by these left token types
@q = qw( h Z );
@essential_whitespace_filter_l2{@q} = (1) x scalar(@q);
# Keep a space between certain types and any bareword:
# Q: keep a space between a quote and a bareword to prevent the
# bareword from becoming a quote modifier.
# &: do not remove space between an '&' and a bare word because
# it may turn into a function evaluation, like here
# between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
# $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
@q = qw( Q & );
@is_type_with_space_before_bareword{@q} = (1) x scalar(@q);
# These are the only characters which can (currently) form special
# variables, like $^W: (issue c066, c068).
@q =
qw{ ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ };
@{is_special_variable_char}{@q} = (1) x scalar(@q);
} ## end BEGIN
sub is_essential_whitespace {
# Essential whitespace means whitespace which cannot be safely deleted
# without risking the introduction of a syntax error.
# We are given three tokens and their types:
# ($tokenl, $typel) is the token to the left of the space in question
# ($tokenr, $typer) is the token to the right of the space in question
# ($tokenll, $typell) is previous nonblank token to the left of $tokenl
#
# Note1: This routine should almost never need to be changed. It is
# for avoiding syntax problems rather than for formatting.
# Note2: The -mangle option causes large numbers of calls to this
# routine and therefore is a good test. So if a change is made, be sure
# to use nytprof to profile with both old and revised coding using the
# -mangle option and check differences.
my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
# This is potentially a very slow routine but the following quick
# filters typically catch and handle over 90% of the calls.
# Filter 1: usually no space required after common types ; , [ ] { } ( )
return
if ( $essential_whitespace_filter_l1{$typel}
&& !$essential_whitespace_filter_r1{$typer} );
# Filter 2: usually no space before common types ; ,
return
if ( $essential_whitespace_filter_r2{$typer}
&& !$essential_whitespace_filter_l2{$typel} );
# Filter 3: Handle side comments: a space is only essential if the left
# token ends in '$' For example, we do not want to create $#foo below:
# sub t086
# ( #foo)))
# $ #foo)))
# a #foo)))
# ) #foo)))
# { ... }
# Also, I prefer not to put a ? and # together because ? used to be
# a pattern delimiter and spacing was used if guessing was needed.
if ( $typer eq '#' ) {
return 1
if ( $tokenl
&& ( $typel eq '?' || substr( $tokenl, -1 ) eq '$' ) );
return;
}
my $tokenr_is_bareword = $tokenr =~ /^\w/ && $tokenr !~ /^\d/;
my $tokenr_is_open_paren = $tokenr eq '(';
my $token_joined = $tokenl . $tokenr;
my $tokenl_is_dash = $tokenl eq '-';
my $result =
# never combine two bare words or numbers
# examples: and ::ok(1)
# return ::spw(...)
# for bla::bla:: abc
# example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
# $input eq"quit" to make $inputeq"quit"
# my $size=-s::SINK if $file; <==OK but we won't do it
# don't join something like: for bla::bla:: abc
# example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
&& ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
# do not combine a number with a concatenation dot
# example: pom.caputo:
# $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
|| $typel eq 'n' && $tokenr eq '.'
|| $typer eq 'n' && $tokenl eq '.'
# cases of a space before a bareword...
|| (
$tokenr_is_bareword && (
# do not join a minus with a bare word, because you might form
# a file test operator. Example from Complex.pm:
# if (CORE::abs($z - i) < $eps);
# "z-i" would be taken as a file test.
$tokenl_is_dash && length($tokenr) == 1
# and something like this could become ambiguous without space
# after the '-':
# use constant III=>1;
# $a = $b - III;
# and even this:
# $a = - III;
|| $tokenl_is_dash && $typer =~ /^[wC]$/
# keep space between types Q & and a bareword
|| $is_type_with_space_before_bareword{$typel}
# +-: binary plus and minus before a bareword could get
# converted into unary plus and minus on next pass through the
# tokenizer. This can lead to blinkers: cases b660 b670 b780
# b781 b787 b788 b790 So we keep a space unless the +/- clearly
# follows an operator
|| ( ( $typel eq '+' || $typel eq '-' )
&& $typell !~ /^[niC\)\}\]R]$/ )
# keep a space between a token ending in '$' and any word;
# this caused trouble: "die @$ if $@"
|| $typel eq 'i' && substr( $tokenl, -1, 1 ) eq '$'
# don't combine $$ or $# with any alphanumeric
# (testfile mangle.t with --mangle)
|| $tokenl eq '$$'
|| $tokenl eq '$#'
)
) ## end $tokenr_is_bareword
# OLD, not used
# '= -' should not become =- or you will get a warning
# about reversed -=
# || ($tokenr eq '-')
# do not join a bare word with a minus, like between 'Send' and
# '-recipients' here <<snippets/space3.in>>
# my $msg = new Fax::Send
# -recipients => $to,
# -data => $data;
# This is the safest thing to do. If we had the token to the right of
# the minus we could do a better check.
#
# And do not combine a bareword and a quote, like this:
# oops "Your login, $Bad_Login, is not valid";
# It can cause a syntax error if oops is a sub
|| $typel eq 'w' && ( $tokenr eq '-' || $typer eq 'Q' )
# perl is very fussy about spaces before <<
|| substr( $tokenr, 0, 2 ) eq '<<'
# avoid combining tokens to create new meanings. Example:
# $a+ +$b must not become $a++$b
|| ( $is_digraph{$token_joined} )
|| $is_trigraph{$token_joined}
# another example: do not combine these two &'s:
# allow_options & &OPT_EXECCGI
|| $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) }
# retain any space after possible filehandle
# (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
# but no space for '$ {' even if '$' is marked as type 'Z', issue c221
|| ( $typel eq 'Z' && !( $tokenl eq '$' && $tokenr eq '{' ) )
# Added 'Y' here 16 Jan 2021 to prevent -mangle option from removing
# space after type Y. Otherwise, it will get parsed as type 'Z' later
# and any space would have to be added back manually if desired.
|| $typel eq 'Y'
# Perl is sensitive to whitespace after the + here:
# $b = xvals $a + 0.1 * yvals $a;
|| $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/
|| (
$tokenr_is_open_paren && (
# keep paren separate in 'use Foo::Bar ()'
( $typel eq 'w' && $typell eq 'k' && $tokenll eq 'use' )
# OLD: keep any space between filehandle and paren:
# file mangle.t with --mangle:
# NEW: this test is no longer necessary here (moved above)
## || $typel eq 'Y'
# must have space between grep and left paren; "grep(" will fail
|| $is_sort_grep_map{$tokenl}
# don't stick numbers next to left parens, as in:
#use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
|| $typel eq 'n'
)
) ## end $tokenr_is_open_paren
# retain any space after here doc operator ( hereerr.t)
|| $typel eq 'h'
# be careful with a space around ++ and --, to avoid ambiguity as to
# which token it applies
|| ( $typer eq 'pp' || $typer eq 'mm' ) && $tokenl !~ /^[\;\{\(\[]/
|| ( $typel eq '++' || $typel eq '--' )
&& $tokenr !~ /^[\;\}\)\]]/
# need space after foreach my; for example, this will fail in
# older versions of Perl:
# foreach my$ft(@filetypes)...
|| (
$tokenl eq 'my'
&& substr( $tokenr, 0, 1 ) eq '$'
# /^(for|foreach)$/
&& $is_for_foreach{$tokenll}
)
# Keep space after like $^ if needed to avoid forming a different
# special variable (issue c068). For example:
# my $aa = $^ ? "none" : "ok";
|| ( $typel eq 'i'
&& length($tokenl) == 2
&& substr( $tokenl, 1, 1 ) eq '^'
&& $is_special_variable_char{ substr( $tokenr, 0, 1 ) } )
# We must be sure that a space between a ? and a quoted string
# remains if the space before the ? remains. [Loca.pm, lockarea]
# ie,
# $b=join $comma ? ',' : ':', @_; # ok
# $b=join $comma?',' : ':', @_; # ok!
# $b=join $comma ?',' : ':', @_; # error!
# Not really required:
## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
# Space stacked labels...
# Not really required: Perl seems to accept non-spaced labels.
## || $typel eq 'J' && $typer eq 'J'
; # the value of this long logic sequence is the result we want
return $result;
} ## end sub is_essential_whitespace
} ## end closure is_essential_whitespace
{ ## begin closure new_secret_operator_whitespace
my %secret_operators;
my %is_leading_secret_token;
BEGIN {
# token lists for perl secret operators as compiled by Philippe Bruhat
# at: https://metacpan.org/module/perlsecret
%secret_operators = (
'Goatse' => [qw#= ( ) =#], #=( )=
'Venus1' => [qw#0 +#], # 0+
'Venus2' => [qw#+ 0#], # +0
'Enterprise' => [qw#) x ! !#], # ()x!!
'Kite1' => [qw#~ ~ <>#], # ~~<>
'Kite2' => [qw#~~ <>#], # ~~<>
'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
'Bang bang ' => [qw#! !#], # !!
);
# The following operators and constants are not included because they
# are normally kept tight by perltidy:
# ~~ <~>
#
# Make a lookup table indexed by the first token of each operator:
# first token => [list, list, ...]
foreach my $value ( values(%secret_operators) ) {
my $tok = $value->[0];
push @{ $is_leading_secret_token{$tok} }, $value;
}
} ## end BEGIN
sub new_secret_operator_whitespace {
my ( $rlong_array, $rwhitespace_flags ) = @_;
# Loop over all tokens in this line
my ( $token, $type );
my $jmax = @{$rlong_array} - 1;
foreach my $j ( 0 .. $jmax ) {
$token = $rlong_array->[$j]->[_TOKEN_];
$type = $rlong_array->[$j]->[_TYPE_];
# Skip unless this token might start a secret operator
next if ( $type eq 'b' );
next unless ( $is_leading_secret_token{$token} );
# Loop over all secret operators with this leading token
foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
my $jend = $j - 1;
foreach my $tok ( @{$rpattern} ) {
$jend++;
$jend++
if ( $jend <= $jmax
&& $rlong_array->[$jend]->[_TYPE_] eq 'b' );
if ( $jend > $jmax
|| $tok ne $rlong_array->[$jend]->[_TOKEN_] )
{
$jend = undef;
last;
}
}
if ($jend) {
# set flags to prevent spaces within this operator
foreach my $jj ( $j + 1 .. $jend ) {
$rwhitespace_flags->[$jj] = WS_NO;
}
$j = $jend;
last;
}
} ## End Loop over all operators
} ## End loop over all tokens
return;
} ## end sub new_secret_operator_whitespace
} ## end closure new_secret_operator_whitespace
{ ## begin closure set_bond_strengths
# These routines and variables are involved in deciding where to break very
# long lines.
# NEW_TOKENS must add bond strength rules
my %is_good_keyword_breakpoint;
my %is_container_token;
my %binary_bond_strength_nospace;
my %binary_bond_strength;
my %nobreak_lhs;
my %nobreak_rhs;
my @bias_tokens;
my %bias_hash;
my %bias;
my $delta_bias;
sub initialize_bond_strength_hashes {
my @q;
@q = qw(if unless while until for foreach);
@is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
@q = qw/ ( [ { } ] ) /;
@is_container_token{@q} = (1) x scalar(@q);
# The decision about where to break a line depends upon a "bond
# strength" between tokens. The LOWER the bond strength, the MORE
# likely a break. A bond strength may be any value but to simplify
# things there are several pre-defined strength levels:
# NO_BREAK => 10000;
# VERY_STRONG => 100;
# STRONG => 2.1;
# NOMINAL => 1.1;
# WEAK => 0.8;
# VERY_WEAK => 0.55;
# The strength values are based on trial-and-error, and need to be
# tweaked occasionally to get desired results. Some comments:
#
# 1. Only relative strengths are important. small differences
# in strengths can make big formatting differences.
# 2. Each indentation level adds one unit of bond strength.
# 3. A value of NO_BREAK makes an unbreakable bond
# 4. A value of VERY_WEAK is the strength of a ','
# 5. Values below NOMINAL are considered ok break points.
# 6. Values above NOMINAL are considered poor break points.
#
# The bond strengths should roughly follow precedence order where
# possible. If you make changes, please check the results very
# carefully on a variety of scripts. Testing with the -extrude
# options is particularly helpful in exercising all of the rules.
# Wherever possible, bond strengths are defined in the following
# tables. There are two main stages to setting bond strengths and
# two types of tables:
#
# The first stage involves looking at each token individually and
# defining left and right bond strengths, according to if we want
# to break to the left or right side, and how good a break point it
# is. For example tokens like =, ||, && make good break points and
# will have low strengths, but one might want to break on either
# side to put them at the end of one line or beginning of the next.
#
# The second stage involves looking at certain pairs of tokens and
# defining a bond strength for that particular pair. This second
# stage has priority.
#---------------------------------------------------------------
# Bond Strength BEGIN Section 1.
# Set left and right bond strengths of individual tokens.
#---------------------------------------------------------------
# NOTE: NO_BREAK's set in this section first are HINTS which will
# probably not be honored. Essential NO_BREAKS's should be set in
# BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
# of this subroutine.
# Note that we are setting defaults in this section. The user
# cannot change bond strengths but can cause the left and right
# bond strengths of any token type to be swapped through the use of
# the -wba and -wbb flags. In this way the user can determine if a
# breakpoint token should appear at the end of one line or the
# beginning of the next line.
%right_bond_strength = ();
%left_bond_strength = ();
%binary_bond_strength_nospace = ();
%binary_bond_strength = ();
%nobreak_lhs = ();
%nobreak_rhs = ();
# The hash keys in this section are token types, plus the text of
# certain keywords like 'or', 'and'.
# no break around possible filehandle
$left_bond_strength{'Z'} = NO_BREAK;
$right_bond_strength{'Z'} = NO_BREAK;
# never put a bare word on a new line:
# example print (STDERR, "bla"); will fail with break after (
$left_bond_strength{'w'} = NO_BREAK;
# blanks always have infinite strength to force breaks after
# real tokens
$right_bond_strength{'b'} = NO_BREAK;
# try not to break on exponentiation
@q = qw# ** .. ... <=> #;
@left_bond_strength{@q} = (STRONG) x scalar(@q);
@right_bond_strength{@q} = (STRONG) x scalar(@q);
# The comma-arrow has very low precedence but not a good break point
$left_bond_strength{'=>'} = NO_BREAK;
$right_bond_strength{'=>'} = NOMINAL;
# ok to break after label
$left_bond_strength{'J'} = NO_BREAK;
$right_bond_strength{'J'} = NOMINAL;
$left_bond_strength{'j'} = STRONG;
$right_bond_strength{'j'} = STRONG;
$left_bond_strength{'A'} = STRONG;
$right_bond_strength{'A'} = STRONG;
$left_bond_strength{'->'} = STRONG;
$right_bond_strength{'->'} = VERY_STRONG;
$left_bond_strength{'CORE::'} = NOMINAL;
$right_bond_strength{'CORE::'} = NO_BREAK;
# Fix for c250: added strengths for new type 'P'
# Note: these are working okay, but may eventually need to be
# adjusted or even removed.
$left_bond_strength{'P'} = NOMINAL;
$right_bond_strength{'P'} = NOMINAL;
# breaking AFTER modulus operator is ok:
@q = qw< % >;
@left_bond_strength{@q} = (STRONG) x scalar(@q);
@right_bond_strength{@q} =
( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
# Break AFTER math operators * and /
@q = qw< * / x >;
@left_bond_strength{@q} = (STRONG) x scalar(@q);
@right_bond_strength{@q} = (NOMINAL) x scalar(@q);
# Break AFTER weakest math operators + and -
# Make them weaker than * but a bit stronger than '.'
@q = qw< + - >;
@left_bond_strength{@q} = (STRONG) x scalar(@q);
@right_bond_strength{@q} =
( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
# Define left strength of unary plus and minus (fixes case b511)
$left_bond_strength{p} = $left_bond_strength{'+'};
$left_bond_strength{m} = $left_bond_strength{'-'};
# And make right strength of unary plus and minus very high.
# Fixes cases b670 b790
$right_bond_strength{p} = NO_BREAK;
$right_bond_strength{m} = NO_BREAK;
# breaking BEFORE these is just ok:
@q = qw# >> << #;
@right_bond_strength{@q} = (STRONG) x scalar(@q);
@left_bond_strength{@q} = (NOMINAL) x scalar(@q);
# breaking before the string concatenation operator seems best
# because it can be hard to see at the end of a line
$right_bond_strength{'.'} = STRONG;
$left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
@q = qw< } ] ) R >;
@left_bond_strength{@q} = (STRONG) x scalar(@q);
@right_bond_strength{@q} = (NOMINAL) x scalar(@q);
# make these a little weaker than nominal so that they get
# favored for end-of-line characters
@q = qw< != == =~ !~ ~~ !~~ >;
@left_bond_strength{@q} = (STRONG) x scalar(@q);
@right_bond_strength{@q} =
( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
# break AFTER these
@q = qw# < > | & >= <= #;
@left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
@right_bond_strength{@q} =
( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
# breaking either before or after a quote is ok
# but bias for breaking before a quote
$left_bond_strength{'Q'} = NOMINAL;
$right_bond_strength{'Q'} = NOMINAL + 0.02;
$left_bond_strength{'q'} = NOMINAL;
$right_bond_strength{'q'} = NOMINAL;
# starting a line with a keyword is usually ok
$left_bond_strength{'k'} = NOMINAL;
# we usually want to bond a keyword strongly to what immediately
# follows, rather than leaving it stranded at the end of a line
$right_bond_strength{'k'} = STRONG;
$left_bond_strength{'G'} = NOMINAL;
$right_bond_strength{'G'} = STRONG;
# assignment operators
@q = qw(
= **= += *= &= <<= &&=
-= /= |= >>= ||= //=
.= %= ^=
x=
);
# Default is to break AFTER various assignment operators
@left_bond_strength{@q} = (STRONG) x scalar(@q);
@right_bond_strength{@q} =
( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
# Default is to break BEFORE '&&' and '||' and '//'
# set strength of '||' to same as '=' so that chains like
# $a = $b || $c || $d will break before the first '||'
$right_bond_strength{'||'} = NOMINAL;
$left_bond_strength{'||'} = $right_bond_strength{'='};
# same thing for '//'
$right_bond_strength{'//'} = NOMINAL;
$left_bond_strength{'//'} = $right_bond_strength{'='};
# set strength of && a little higher than ||
$right_bond_strength{'&&'} = NOMINAL;
$left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
$left_bond_strength{';'} = VERY_STRONG;
$right_bond_strength{';'} = VERY_WEAK;
$left_bond_strength{'f'} = VERY_STRONG;
# make right strength of for ';' a little less than '='
# to make for contents break after the ';' to avoid this:
# for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
# $number_of_fields )
# and make it weaker than ',' and 'and' too
$right_bond_strength{'f'} = VERY_WEAK - 0.03;
# The strengths of ?/: should be somewhere between
# an '=' and a quote (NOMINAL),
# make strength of ':' slightly less than '?' to help
# break long chains of ? : after the colons
$left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
$right_bond_strength{':'} = NO_BREAK;
$left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
$right_bond_strength{'?'} = NO_BREAK;
$left_bond_strength{','} = VERY_STRONG;
$right_bond_strength{','} = VERY_WEAK;
# remaining digraphs and trigraphs not defined above
@q = qw( :: <> ++ --);
@left_bond_strength{@q} = (WEAK) x scalar(@q);
@right_bond_strength{@q} = (STRONG) x scalar(@q);
# Set bond strengths of certain keywords
# make 'or', 'err', 'and' slightly weaker than a ','
$left_bond_strength{'and'} = VERY_WEAK - 0.01;
$left_bond_strength{'or'} = VERY_WEAK - 0.02;
$left_bond_strength{'err'} = VERY_WEAK - 0.02;
$left_bond_strength{'xor'} = VERY_WEAK - 0.01;
@q = qw(ne eq);
@left_bond_strength{@q} = (NOMINAL) x scalar(@q);
@q = qw(lt gt le ge);
@left_bond_strength{@q} = ( 0.9 * NOMINAL + 0.1 * STRONG ) x scalar(@q);
@q = qw(and or err xor ne eq);
@right_bond_strength{@q} = (NOMINAL) x scalar(@q);
#---------------------------------------------------------------
# Bond Strength BEGIN Section 2.
# Set binary rules for bond strengths between certain token types.
#---------------------------------------------------------------
# We have a little problem making tables which apply to the
# container tokens. Here is a list of container tokens and
# their types:
#
# type tokens // meaning
# { {, [, ( // indent
# } }, ], ) // outdent
# [ [ // left non-structural [ (enclosing an array index)
# ] ] // right non-structural square bracket
# ( ( // left non-structural paren
# ) ) // right non-structural paren
# L { // left non-structural curly brace (enclosing a key)
# R } // right non-structural curly brace
#
# Some rules apply to token types and some to just the token
# itself. We solve the problem by combining type and token into a
# new hash key for the container types.
#
# If a rule applies to a token 'type' then we need to make rules
# for each of these 'type.token' combinations:
# Type Type.Token
# { {{, {[, {(
# [ [[
# ( ((
# L L{
# } }}, }], })
# ] ]]
# ) ))
# R R}
#
# If a rule applies to a token then we need to make rules for
# these 'type.token' combinations:
# Token Type.Token
# { {{, L{
# [ {[, [[
# ( {(, ((
# } }}, R}
# ] }], ]]
# ) }), ))
# allow long lines before final { in an if statement, as in:
# if (..........
# ..........)
# {
#
# Otherwise, the line before the { tends to be too short.
$binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
$binary_bond_strength{'(('}{'{{'} = NOMINAL;
# break on something like '} (', but keep this stronger than a ','
# example is in 'howe.pl'
$binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
$binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
# keep matrix and hash indices together
# but make them a little below STRONG to allow breaking open
# something like {'some-word'}{'some-very-long-word'} at the }{
# (bracebrk.t)
$binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
$binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
$binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
$binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
# increase strength to the point where a break in the following
# will be after the opening paren rather than at the arrow:
# $a->$b($c);
$binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
# Added for c140 to make 'w ->' and 'i ->' behave the same
$binary_bond_strength{'w'}{'->'} = 1.45 * STRONG;
# Note that the following alternative strength would make the break at the
# '->' rather than opening the '('. Both have advantages and disadvantages.
# $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; #
$binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
$binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
$binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
$binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
$binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
$binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
$binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
$binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
$binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
$binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
#---------------------------------------------------------------
# Binary NO_BREAK rules
#---------------------------------------------------------------
# use strict requires that bare word and => not be separated
$binary_bond_strength{'C'}{'=>'} = NO_BREAK;
$binary_bond_strength{'U'}{'=>'} = NO_BREAK;
# Never break between a bareword and a following paren because
# perl may give an error. For example, if a break is placed
# between 'to_filehandle' and its '(' the following line will
# give a syntax error [Carp.pm]: my( $no) =fileno(
# to_filehandle( $in)) ;
$binary_bond_strength{'C'}{'(('} = NO_BREAK;
$binary_bond_strength{'C'}{'{('} = NO_BREAK;
$binary_bond_strength{'U'}{'(('} = NO_BREAK;
$binary_bond_strength{'U'}{'{('} = NO_BREAK;
# use strict requires that bare word within braces not start new
# line
$binary_bond_strength{'L{'}{'w'} = NO_BREAK;
$binary_bond_strength{'w'}{'R}'} = NO_BREAK;
# The following two rules prevent a syntax error caused by breaking up
# a construction like '{-y}'. The '-' quotes the 'y' and prevents
# it from being taken as a transliteration. We have to keep
# token types 'L m w' together to prevent this error.
$binary_bond_strength{'L{'}{'m'} = NO_BREAK;
$binary_bond_strength_nospace{'m'}{'w'} = NO_BREAK;
# keep 'bareword-' together, but only if there is no space between
# the word and dash. Do not keep together if there is a space.
# example 'use perl6-alpha'
$binary_bond_strength_nospace{'w'}{'m'} = NO_BREAK;
# use strict requires that bare word and => not be separated
$binary_bond_strength{'w'}{'=>'} = NO_BREAK;
# use strict does not allow separating type info from trailing { }
# testfile is readmail.pl
$binary_bond_strength{'t'}{'L{'} = NO_BREAK;
$binary_bond_strength{'i'}{'L{'} = NO_BREAK;
# Fix for c250: set strength for new 'S' to be same as 'i'
# testfile is test11/Hub.pm
$binary_bond_strength{'S'}{'L{'} = NO_BREAK;
# As a defensive measure, do not break between a '(' and a
# filehandle. In some cases, this can cause an error. For
# example, the following program works:
# my $msg="hi!\n";
# print
# ( STDOUT
# $msg
# );
#
# But this program fails:
# my $msg="hi!\n";
# print
# (
# STDOUT
# $msg
# );
#
# This is normally only a problem with the 'extrude' option
$binary_bond_strength{'(('}{'Y'} = NO_BREAK;
$binary_bond_strength{'{('}{'Y'} = NO_BREAK;
# never break between sub name and opening paren
$binary_bond_strength{'w'}{'(('} = NO_BREAK;
$binary_bond_strength{'w'}{'{('} = NO_BREAK;
# keep '}' together with ';'
$binary_bond_strength{'}}'}{';'} = NO_BREAK;
# Breaking before a ++ can cause perl to guess wrong. For
# example the following line will cause a syntax error
# with -extrude if we break between '$i' and '++' [fixstyle2]
# print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
$nobreak_lhs{'++'} = NO_BREAK;
# Do not break before a possible file handle
$nobreak_lhs{'Z'} = NO_BREAK;
# use strict hates bare words on any new line. For
# example, a break before the underscore here provokes the
# wrath of use strict:
# if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
$nobreak_rhs{'F'} = NO_BREAK;
$nobreak_rhs{'CORE::'} = NO_BREAK;
# To prevent the tokenizer from switching between types 'w' and 'G' we
# need to avoid breaking between type 'G' and the following code block
# brace. Fixes case b929.
$nobreak_rhs{G} = NO_BREAK;
#---------------------------------------------------------------
# Bond Strength BEGIN Section 3.
# Define tables and values for applying a small bias to the above
# values.
#---------------------------------------------------------------
# Adding a small 'bias' to strengths is a simple way to make a line
# break at the first of a sequence of identical terms. For
# example, to force long string of conditional operators to break
# with each line ending in a ':', we can add a small number to the
# bond strength of each ':' (colon.t)
@bias_tokens = qw( : && || f and or . ); # tokens which get bias
%bias_hash = map { $_ => 0 } @bias_tokens;
$delta_bias = 0.0001; # a very small strength level
return;
} ## end sub initialize_bond_strength_hashes
use constant DEBUG_BOND => 0;
sub set_bond_strengths {
my ($self) = @_;
#-----------------------------------------------------------------
# Define a 'bond strength' for each token pair in an output batch.
# See comments above for definition of bond strength.
#-----------------------------------------------------------------
my $rbond_strength_to_go = [];
my $rLL = $self->[_rLL_];
my $rK_weld_right = $self->[_rK_weld_right_];
my $rK_weld_left = $self->[_rK_weld_left_];
my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
# patch-its always ok to break at end of line
$nobreak_to_go[$max_index_to_go] = 0;
# we start a new set of bias values for each line
%bias = %bias_hash;
my $code_bias = -.01; # bias for closing block braces
my $type = 'b';
my $token = SPACE;
my $token_length = 1;
my $last_type;
my $last_nonblank_type = $type;
my $last_nonblank_token = $token;
my $list_str = $left_bond_strength{'?'};
my ( $bond_str_1, $bond_str_2, $bond_str_3, $bond_str_4 );
my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
$next_nonblank_type, $next_token, $next_type,
$total_nesting_depth, );
# main loop to compute bond strengths between each pair of tokens
foreach my $i ( 0 .. $max_index_to_go ) {
$last_type = $type;
if ( $type ne 'b' ) {
$last_nonblank_type = $type;
$last_nonblank_token = $token;
}
$type = $types_to_go[$i];
# strength on both sides of a blank is the same
if ( $type eq 'b' && $last_type ne 'b' ) {
$rbond_strength_to_go->[$i] = $rbond_strength_to_go->[ $i - 1 ];
$nobreak_to_go[$i] ||= $nobreak_to_go[ $i - 1 ]; # fix for b1257
next;
}
$token = $tokens_to_go[$i];
$token_length = $token_lengths_to_go[$i];
$block_type = $block_type_to_go[$i];
$i_next = $i + 1;
$next_type = $types_to_go[$i_next];
$next_token = $tokens_to_go[$i_next];
$total_nesting_depth = $nesting_depth_to_go[$i_next];
$i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
$next_nonblank_type = $types_to_go[$i_next_nonblank];
$next_nonblank_token = $tokens_to_go[$i_next_nonblank];
my $seqno = $type_sequence_to_go[$i];
my $next_nonblank_seqno = $type_sequence_to_go[$i_next_nonblank];
# We are computing the strength of the bond between the current
# token and the NEXT token.
#---------------------------------------------------------------
# Bond Strength Section 1:
# First Approximation.
# Use minimum of individual left and right tabulated bond
# strengths.
#---------------------------------------------------------------
my $bsr = $right_bond_strength{$type};
my $bsl = $left_bond_strength{$next_nonblank_type};
# define right bond strengths of certain keywords
if ( $type eq 'k' ) {
if ( defined( $right_bond_strength{$token} ) ) {
$bsr = $right_bond_strength{$token};
}
}
# set terminal bond strength to the nominal value
# this will cause good preceding breaks to be retained
if ( $i_next_nonblank > $max_index_to_go ) {
$bsl = NOMINAL;
# But weaken the bond at a 'missing terminal comma'. If an
# optional comma is missing at the end of a broken list, use
# the strength of a comma anyway to make formatting the same as
# if it were there. Fixes issue c133.
if ( !defined($bsr) || $bsr > VERY_WEAK ) {
my $seqno_px = $parent_seqno_to_go[$max_index_to_go];
if ( $ris_list_by_seqno->{$seqno_px} ) {
my $KK = $K_to_go[$max_index_to_go];
my $Kn = $self->K_next_nonblank($KK);
my $seqno_n = $rLL->[$Kn]->[_TYPE_SEQUENCE_];
if ( $seqno_n && $seqno_n eq $seqno_px ) {
$bsl = VERY_WEAK;
}
}
}
}
# define left bond strengths of certain keywords
if ( $next_nonblank_type eq 'k' ) {
if ( defined( $left_bond_strength{$next_nonblank_token} ) ) {
$bsl = $left_bond_strength{$next_nonblank_token};
}
}
# Use the minimum of the left and right strengths. Note: it might
# seem that we would want to keep a NO_BREAK if either token has
# this value. This didn't work, for example because in an arrow
# list, it prevents the comma from separating from the following
# bare word (which is probably quoted by its arrow). So necessary
# NO_BREAK's have to be handled as special cases in the final
# section.
if ( !defined($bsr) ) { $bsr = VERY_STRONG }
if ( !defined($bsl) ) { $bsl = VERY_STRONG }
my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
$bond_str_1 = $bond_str if (DEBUG_BOND);
#---------------------------------------------------------------
# Bond Strength Section 2:
# Apply hardwired rules..
#---------------------------------------------------------------
# Patch to put terminal or clauses on a new line: Weaken the bond
# at an || followed by die or similar keyword to make the terminal
# or clause fall on a new line, like this:
#
# my $class = shift
# || die "Cannot add broadcast: No class identifier found";
#
# Otherwise the break will be at the previous '=' since the || and
# = have the same starting strength and the or is biased, like
# this:
#
# my $class =
# shift || die "Cannot add broadcast: No class identifier found";
#
# In any case if the user places a break at either the = or the ||
# it should remain there.
if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
# /^(die|confess|croak|warn)$/
if ( $is_die_confess_croak_warn{$next_nonblank_token} ) {
if ( $want_break_before{$token} && $i > 0 ) {
$rbond_strength_to_go->[ $i - 1 ] -= $delta_bias;
# keep bond strength of a token and its following blank
# the same
if ( $types_to_go[ $i - 1 ] eq 'b' && $i > 2 ) {
$rbond_strength_to_go->[ $i - 2 ] -= $delta_bias;
}
}
else {
$bond_str -= $delta_bias;
}
}
}
# good to break after end of code blocks
if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
$bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
$code_bias += $delta_bias;
}
if ( $type eq 'k' ) {
# allow certain control keywords to stand out
if ( $next_nonblank_type eq 'k'
&& $is_last_next_redo_return{$token} )
{
$bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
}
# Don't break after keyword my. This is a quick fix for a
# rare problem with perl. An example is this line from file
# Container.pm:
# foreach my $question( Debian::DebConf::ConfigDb::gettree(
# $this->{'question'} ) )
if ( $token eq 'my' ) {
$bond_str = NO_BREAK;
}
}
if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
if ( $is_keyword_returning_list{$next_nonblank_token} ) {
$bond_str = $list_str if ( $bond_str > $list_str );
}
# keywords like 'unless', 'if', etc, within statements
# make good breaks
if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
$bond_str = VERY_WEAK / 1.05;
}
}
# try not to break before a comma-arrow
elsif ( $next_nonblank_type eq '=>' ) {
if ( $bond_str < STRONG ) { $bond_str = STRONG }
}
else {
## ok - not special
}
#---------------------------------------------------------------
# Additional hardwired NOBREAK rules
#---------------------------------------------------------------
# map1.t -- correct for a quirk in perl
if ( $token eq '('
&& $next_nonblank_type eq 'i'
&& $last_nonblank_type eq 'k'
&& $is_sort_map_grep{$last_nonblank_token} )
# /^(sort|map|grep)$/ )
{
$bond_str = NO_BREAK;
}
# extrude.t: do not break before paren at:
# -l pid_filename(
if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
$bond_str = NO_BREAK;
}
# OLD COMMENT: In older version of perl, use strict can cause
# problems with breaks before bare words following opening parens.
# For example, this will fail under older versions if a break is
# made between '(' and 'MAIL':
# use strict; open( MAIL, "a long filename or command"); close MAIL;
# NEW COMMENT: Third fix for b1213:
# This option does not seem to be needed any longer, and it can
# cause instabilities. It can be turned off, but to minimize
# changes to existing formatting it is retained only in the case
# where the previous token was 'open' and there was no line break.
# Even this could eventually be removed if it causes instability.
if ( $type eq '{' ) {
if ( $token eq '('
&& $next_nonblank_type eq 'w'
&& $last_nonblank_type eq 'k'
&& $last_nonblank_token eq 'open'
&& !$old_breakpoint_to_go[$i] )
{
$bond_str = NO_BREAK;
}
}
# Do not break between a possible filehandle and a ? or / and do
# not introduce a break after it if there is no blank
# (extrude.t)
elsif ( $type eq 'Z' ) {
# don't break..
if (
# if there is no blank and we do not want one. Examples:
# print $x++ # do not break after $x
# print HTML"HELLO" # break ok after HTML
(
$next_type ne 'b'
&& defined( $want_left_space{$next_type} )
&& $want_left_space{$next_type} == WS_NO
)
# or we might be followed by the start of a quote,
# and this is not an existing breakpoint; fixes c039.
|| !$old_breakpoint_to_go[$i]
&& substr( $next_nonblank_token, 0, 1 ) eq '/'
)
{
$bond_str = NO_BREAK;
}
}
else {
## ok - not special
}
# Breaking before a ? before a quote can cause trouble if
# they are not separated by a blank.
# Example: a syntax error occurs if you break before the ? here
# my$logic=join$all?' && ':' || ',@regexps;
# From: Professional_Perl_Programming_Code/multifind.pl
if ( $next_nonblank_type eq '?' ) {
$bond_str = NO_BREAK
if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
}
# Breaking before a . followed by a number
# can cause trouble if there is no intervening space
# Example: a syntax error occurs if you break before the .2 here
# $str .= pack($endian.2, ensurrogate($ord));
# From: perl58/Unicode.pm
elsif ( $next_nonblank_type eq '.' ) {
$bond_str = NO_BREAK
if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
}
# Fix for c039
elsif ( $type eq 'w' ) {
$bond_str = NO_BREAK
if ( !$old_breakpoint_to_go[$i]
&& substr( $next_nonblank_token, 0, 1 ) eq '/'
&& $next_nonblank_type ne '//' );
}
else {
## ok - not special
}
$bond_str_2 = $bond_str if (DEBUG_BOND);
#---------------------------------------------------------------
# End of hardwired rules
#---------------------------------------------------------------
#---------------------------------------------------------------
# Bond Strength Section 3:
# Apply table rules. These have priority over the above
# hardwired rules.
#---------------------------------------------------------------
my $tabulated_bond_str;
my $ltype = $type;
my $rtype = $next_nonblank_type;
if ( $seqno && $is_container_token{$token} ) {
$ltype = $type . $token;
}
if ( $next_nonblank_seqno
&& $is_container_token{$next_nonblank_token} )
{
$rtype = $next_nonblank_type . $next_nonblank_token;
# Alternate Fix #1 for issue b1299. This version makes the
# decision as soon as possible. See Alternate Fix #2 also.
# Do not separate a bareword identifier from its paren: b1299
# This is currently needed for stability because if the bareword
# gets separated from a preceding '->' and following '(' then
# the tokenizer may switch from type 'i' to type 'w'. This
# patch will prevent this by keeping it adjacent to its '('.
## if ( $next_nonblank_token eq '('
## && $ltype eq 'i'
## && substr( $token, 0, 1 ) =~ /^\w$/ )
## {
## $ltype = 'w';
## }
}
# apply binary rules which apply regardless of space between tokens
if ( $binary_bond_strength{$ltype}{$rtype} ) {
$bond_str = $binary_bond_strength{$ltype}{$rtype};
$tabulated_bond_str = $bond_str;
}
# apply binary rules which apply only if no space between tokens
if ( $binary_bond_strength_nospace{$ltype}{$next_type} ) {
$bond_str = $binary_bond_strength{$ltype}{$next_type};
$tabulated_bond_str = $bond_str;
}
if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
$bond_str = NO_BREAK;
$tabulated_bond_str = $bond_str;
}
$bond_str_3 = $bond_str if (DEBUG_BOND);
# If the hardwired rules conflict with the tabulated bond
# strength then there is an inconsistency that should be fixed
DEBUG_BOND
&& $tabulated_bond_str
&& $bond_str_1
&& $bond_str_1 != $bond_str_2
&& $bond_str_2 != $tabulated_bond_str
&& do {
print {*STDOUT}
"BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
};
#-----------------------------------------------------------------
# Bond Strength Section 4:
# Modify strengths of certain tokens which often occur in sequence
# by adding a small bias to each one in turn so that the breaks
# occur from left to right.
#
# Note that we only changing strengths by small amounts here,
# and usually increasing, so we should not be altering any NO_BREAKs.
# Other routines which check for NO_BREAKs will use a tolerance
# of one to avoid any problem.
#-----------------------------------------------------------------
# The bias tables use special keys:
# $type - if not keyword
# $token - if keyword, but map some keywords together
my $left_key =
$type eq 'k' ? $token eq 'err' ? 'or' : $token : $type;
my $right_key =
$next_nonblank_type eq 'k'
? $next_nonblank_token eq 'err'
? 'or'
: $next_nonblank_token
: $next_nonblank_type;
# bias left token
if ( defined( $bias{$left_key} ) ) {
if ( !$want_break_before{$left_key} ) {
$bias{$left_key} += $delta_bias;
$bond_str += $bias{$left_key};
}
}
# bias right token
if ( defined( $bias{$right_key} ) ) {
if ( $want_break_before{$right_key} ) {
# for leading '.' align all but 'short' quotes; the idea
# is to not place something like "\n" on a single line.
if ( $right_key eq '.' ) {
my $is_short_quote = $last_nonblank_type eq '.'
&& ( $token_length <=
$rOpts_short_concatenation_item_length )
&& !$is_closing_token{$token};
if ( !$is_short_quote ) {
$bias{$right_key} += $delta_bias;
}
}
else {
$bias{$right_key} += $delta_bias;
}
$bond_str += $bias{$right_key};
}
}
$bond_str_4 = $bond_str if (DEBUG_BOND);
#---------------------------------------------------------------
# Bond Strength Section 5:
# Fifth Approximation.
# Take nesting depth into account by adding the nesting depth
# to the bond strength.
#---------------------------------------------------------------
my $strength;
if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
if ( $total_nesting_depth > 0 ) {
$strength = $bond_str + $total_nesting_depth;
}
else {
$strength = $bond_str;
}
}
else {
$strength = NO_BREAK;
# For critical code such as lines with here targets we must
# be absolutely sure that we do not allow a break. So for
# these the nobreak flag exceeds 1 as a signal. Otherwise we
# can run into trouble when small tolerances are added.
$strength += 1
if ( $nobreak_to_go[$i] && $nobreak_to_go[$i] > 1 );
}
#---------------------------------------------------------------
# Bond Strength Section 6:
# Sixth Approximation. Welds.
#---------------------------------------------------------------
# Do not allow a break within welds
if ( $total_weld_count && $seqno ) {
my $KK = $K_to_go[$i];
if ( $rK_weld_right->{$KK} ) {
$strength = NO_BREAK;
}
# But encourage breaking after opening welded tokens
elsif ($rK_weld_left->{$KK}
&& $is_opening_token{$token} )
{
$strength -= 1;
}
else {
## ok - not welded left or right
}
}
# always break after side comment
if ( $type eq '#' ) { $strength = 0 }
$rbond_strength_to_go->[$i] = $strength;
# Fix for case c001: be sure NO_BREAK's are enforced by later
# routines, except at a '?' because '?' as quote delimiter is
# deprecated.
if ( $strength >= NO_BREAK && $next_nonblank_type ne '?' ) {
$nobreak_to_go[$i] ||= 1;
}
DEBUG_BOND && do {
my $str = substr( $token, 0, 15 );
$str .= SPACE x ( 16 - length($str) );
print {*STDOUT}
"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
# reset for next pass
$bond_str_1 = $bond_str_2 = $bond_str_3 = $bond_str_4 = undef;
};
} ## end main loop
return $rbond_strength_to_go;
} ## end sub set_bond_strengths
} ## end closure set_bond_strengths
sub bad_pattern {
my ($pattern) = @_;
# See if a pattern will compile.
# Note: this sub is also called from Tokenizer
my $regex = eval { qr/$pattern/ };
return $EVAL_ERROR;
}
{ ## begin closure prepare_cuddled_block_types
my %no_cuddle;
# Add keywords here which really should not be cuddled
BEGIN {
my @q = qw(if unless for foreach while);
@no_cuddle{@q} = (1) x scalar(@q);
}
sub prepare_cuddled_block_types {
# the cuddled-else style, if used, is controlled by a hash that
# we construct here
# Include keywords here which should not be cuddled
my $cuddled_string = EMPTY_STRING;
if ( $rOpts->{'cuddled-else'} ) {
# set the default
$cuddled_string = 'elsif else continue catch finally'
unless ( $rOpts->{'cuddled-block-list-exclusive'} );
# This is the old equivalent but more complex version
# $cuddled_string = 'if-elsif-else unless-elsif-else -continue ';
# Add users other blocks to be cuddled
my $cuddled_block_list = $rOpts->{'cuddled-block-list'};
if ($cuddled_block_list) {
$cuddled_string .= SPACE . $cuddled_block_list;
}
}
# If we have a cuddled string of the form
# 'try-catch-finally'
# we want to prepare a hash of the form
# $rcuddled_block_types = {
# 'try' => {
# 'catch' => 1,
# 'finally' => 1
# },
# };
# use -dcbl to dump this hash
# Multiple such strings are input as a space or comma separated list
# If we get two lists with the same leading type, such as
# -cbl = "-try-catch-finally -try-catch-otherwise"
# then they will get merged as follows:
# $rcuddled_block_types = {
# 'try' => {
# 'catch' => 1,
# 'finally' => 2,
# 'otherwise' => 1,
# },
# };
# This will allow either type of chain to be followed.
$cuddled_string =~ s/,/ /g; # allow space or comma separated lists
my @cuddled_strings = split /\s+/, $cuddled_string;
$rcuddled_block_types = {};
# process each dash-separated string...
my $string_count = 0;
foreach my $string (@cuddled_strings) {
next unless $string;
my @words = split /-+/, $string; # allow multiple dashes
# we could look for and report possible errors here...
next if ( @words <= 0 );
# allow either '-continue' or *-continue' for arbitrary starting type
my $start = '*';
# a single word without dashes is a secondary block type
if ( @words > 1 ) {
$start = shift @words;
}
# always make an entry for the leading word. If none follow, this
# will still prevent a wildcard from matching this word.
if ( !defined( $rcuddled_block_types->{$start} ) ) {
$rcuddled_block_types->{$start} = {};
}
# The count gives the original word order in case we ever want it.
$string_count++;
my $word_count = 0;
foreach my $word (@words) {
next unless $word;
if ( $no_cuddle{$word} ) {
Warn(
"## Ignoring keyword '$word' in -cbl; does not seem right\n"
);
next;
}
$word_count++;
$rcuddled_block_types->{$start}->{$word} =
1; #"$string_count.$word_count";
# git#9: Remove this word from the list of desired one-line
# blocks
$want_one_line_block{$word} = 0;
}
}
return;
} ## end sub prepare_cuddled_block_types
} ## end closure prepare_cuddled_block_types
sub dump_cuddled_block_list {
my ($fh) = @_;
# ORIGINAL METHOD: Here is the format of the cuddled block type hash
# which controls this routine
# my $rcuddled_block_types = {
# 'if' => {
# 'else' => 1,
# 'elsif' => 1
# },
# 'try' => {
# 'catch' => 1,
# 'finally' => 1
# },
# };
# SIMPLIFIED METHOD: the simplified method uses a wildcard for
# the starting block type and puts all cuddled blocks together:
# my $rcuddled_block_types = {
# '*' => {
# 'else' => 1,
# 'elsif' => 1
# 'catch' => 1,
# 'finally' => 1
# },
# };
# Both methods work, but the simplified method has proven to be adequate and
# easier to manage.
my $cuddled_string = $rOpts->{'cuddled-block-list'};
$cuddled_string = EMPTY_STRING unless $cuddled_string;
my $flags = EMPTY_STRING;
$flags .= "-ce" if ( $rOpts->{'cuddled-else'} );
$flags .= " -cbl='$cuddled_string'";
if ( !$rOpts->{'cuddled-else'} ) {
$flags .= "\nNote: You must specify -ce to generate a cuddled hash";
}
$fh->print(<<EOM);
------------------------------------------------------------------------
Hash of cuddled block types prepared for a run with these parameters:
$flags
------------------------------------------------------------------------
EOM
use Data::Dumper;
$fh->print( Dumper($rcuddled_block_types) );
$fh->print(<<EOM);
------------------------------------------------------------------------
EOM
return;
} ## end sub dump_cuddled_block_list
sub make_static_block_comment_pattern {
# create the pattern used to identify static block comments
$static_block_comment_pattern = '^\s*##';
# allow the user to change it
if ( $rOpts->{'static-block-comment-prefix'} ) {
my $prefix = $rOpts->{'static-block-comment-prefix'};
$prefix =~ s/^\s*//;
my $pattern = $prefix;
# user may give leading caret to force matching left comments only
if ( $prefix !~ /^\^#/ ) {
if ( $prefix !~ /^#/ ) {
Die(
"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"
);
}
$pattern = '^\s*' . $prefix;
}
if ( bad_pattern($pattern) ) {
Die(
"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"
);
}
$static_block_comment_pattern = $pattern;
}
return;
} ## end sub make_static_block_comment_pattern
sub make_format_skipping_pattern {
my ( $opt_name, $default ) = @_;
my $param = $rOpts->{$opt_name};
if ( !$param ) { $param = $default }
$param =~ s/^\s*//;
if ( $param !~ /^#/ ) {
Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
}
my $pattern = '^' . $param . '\s';
if ( bad_pattern($pattern) ) {
Die(
"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
);
}
return $pattern;
} ## end sub make_format_skipping_pattern
sub make_non_indenting_brace_pattern {
# Create the pattern used to identify static side comments.
# Note that we are ending the pattern in a \s. This will allow
# the pattern to be followed by a space and some text, or a newline.
# The pattern is used in sub 'non_indenting_braces'
$non_indenting_brace_pattern = '^#<<<\s';
# allow the user to change it
if ( $rOpts->{'non-indenting-brace-prefix'} ) {
my $prefix = $rOpts->{'non-indenting-brace-prefix'};
$prefix =~ s/^\s*//;
if ( $prefix !~ /^#/ ) {
Die("ERROR: the -nibp parameter '$prefix' must begin with '#'\n");
}
my $pattern = '^' . $prefix . '\s';
if ( bad_pattern($pattern) ) {
Die(
"ERROR: the -nibp prefix '$prefix' causes the invalid regex '$pattern'\n"
);
}
$non_indenting_brace_pattern = $pattern;
}
return;
} ## end sub make_non_indenting_brace_pattern
sub make_closing_side_comment_list_pattern {
# turn any input list into a regex for recognizing selected block types
$closing_side_comment_list_pattern = '^\w+';
if ( defined( $rOpts->{'closing-side-comment-list'} )
&& $rOpts->{'closing-side-comment-list'} )
{
$closing_side_comment_list_pattern =
make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
}
return;
} ## end sub make_closing_side_comment_list_pattern
sub make_sub_matching_pattern {
# Patterns for standardizing matches to block types for regular subs and
# anonymous subs. Examples
# 'sub process' is a named sub
# 'sub ::m' is a named sub
# 'sub' is an anonymous sub
# 'sub:' is a label, not a sub
# 'sub :' is a label, not a sub ( block type will be <sub:> )
# sub'_ is a named sub ( block type will be <sub '_> )
# 'substr' is a keyword
# So note that named subs always have a space after 'sub'
$SUB_PATTERN = '^sub\s'; # match normal sub
$ASUB_PATTERN = '^sub$'; # match anonymous sub
%matches_ASUB = ( 'sub' => 1 );
# Fix the patterns to include any sub aliases:
# Note that any 'sub-alias-list' has been preprocessed to
# be a trimmed, space-separated list which includes 'sub'
# for example, it might be 'sub method fun'
my @words;
my $sub_alias_list = $rOpts->{'sub-alias-list'};
if ($sub_alias_list) {
@words = split /\s+/, $sub_alias_list;
}
else {
push @words, 'sub';
}
# add 'method' unless use-feature='noclass' is set.
if ( !defined( $rOpts->{'use-feature'} )
|| $rOpts->{'use-feature'} !~ /\bnoclass\b/ )
{
push @words, 'method';
}
# Note (see also RT #133130): These patterns are used by
# sub make_block_pattern, which is used for making most patterns.
# So this sub needs to be called before other pattern-making routines.
if ( @words > 1 ) {
# Two ways are provided to match an anonymous sub:
# $ASUB_PATTERN - with a regex (old method, slow)
# %matches_ASUB - with a hash lookup (new method, faster)
@matches_ASUB{@words} = (1) x scalar(@words);
my $alias_list = join '|', keys %matches_ASUB;
$SUB_PATTERN =~ s/sub/\($alias_list\)/;
$ASUB_PATTERN =~ s/sub/\($alias_list\)/;
}
return;
} ## end sub make_sub_matching_pattern
sub make_bl_pattern {
# Set defaults lists to retain historical default behavior for -bl:
my $bl_list_string = '*';
my $bl_exclusion_list_string = 'sort map grep eval asub';
if ( defined( $rOpts->{'brace-left-list'} )
&& $rOpts->{'brace-left-list'} )
{
$bl_list_string = $rOpts->{'brace-left-list'};
}
if ( $bl_list_string =~ /\bsub\b/ ) {
$rOpts->{'opening-sub-brace-on-new-line'} ||=
$rOpts->{'opening-brace-on-new-line'};
}
if ( $bl_list_string =~ /\basub\b/ ) {
$rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||=
$rOpts->{'opening-brace-on-new-line'};
}
$bl_pattern = make_block_pattern( '-bll', $bl_list_string );
# for -bl, a list with '*' turns on -sbl and -asbl
if ( $bl_pattern =~ /\.\*/ ) {
$rOpts->{'opening-sub-brace-on-new-line'} ||=
$rOpts->{'opening-brace-on-new-line'};
$rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||=
$rOpts->{'opening-anonymous-brace-on-new-line'};
}
if ( defined( $rOpts->{'brace-left-exclusion-list'} )
&& $rOpts->{'brace-left-exclusion-list'} )
{
$bl_exclusion_list_string = $rOpts->{'brace-left-exclusion-list'};
if ( $bl_exclusion_list_string =~ /\bsub\b/ ) {
$rOpts->{'opening-sub-brace-on-new-line'} = 0;
}
if ( $bl_exclusion_list_string =~ /\basub\b/ ) {
$rOpts->{'opening-anonymous-sub-brace-on-new-line'} = 0;
}
}
$bl_exclusion_pattern =
make_block_pattern( '-blxl', $bl_exclusion_list_string );
return;
} ## end sub make_bl_pattern
sub make_bli_pattern {
# default list of block types for which -bli would apply
my $bli_list_string = 'if else elsif unless while for foreach do : sub';
my $bli_exclusion_list_string = SPACE;
if ( defined( $rOpts->{'brace-left-and-indent-list'} )
&& $rOpts->{'brace-left-and-indent-list'} )
{
$bli_list_string = $rOpts->{'brace-left-and-indent-list'};
}
$bli_pattern = make_block_pattern( '-blil', $bli_list_string );
if ( defined( $rOpts->{'brace-left-and-indent-exclusion-list'} )
&& $rOpts->{'brace-left-and-indent-exclusion-list'} )
{
$bli_exclusion_list_string =
$rOpts->{'brace-left-and-indent-exclusion-list'};
}
$bli_exclusion_pattern =
make_block_pattern( '-blixl', $bli_exclusion_list_string );
return;
} ## end sub make_bli_pattern
sub make_keyword_group_list_pattern {
# turn any input list into a regex for recognizing selected block types.
# Here are the defaults:
$keyword_group_list_pattern = '^(our|local|my|use|require|)$';
$keyword_group_list_comment_pattern = EMPTY_STRING;
if ( defined( $rOpts->{'keyword-group-blanks-list'} )
&& $rOpts->{'keyword-group-blanks-list'} )
{
my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'};
my @keyword_list;
my @comment_list;
foreach my $word (@words) {
if ( $word eq 'BC' || $word eq 'SBC' ) {
push @comment_list, $word;
if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' }
}
else {
push @keyword_list, $word;
}
}
$keyword_group_list_pattern =
make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} );
$keyword_group_list_comment_pattern =
make_block_pattern( '-kgbl', join( SPACE, @comment_list ) );
}
return;
} ## end sub make_keyword_group_list_pattern
sub make_block_brace_vertical_tightness_pattern {
# turn any input list into a regex for recognizing selected block types
$block_brace_vertical_tightness_pattern =
'^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
&& $rOpts->{'block-brace-vertical-tightness-list'} )
{
$block_brace_vertical_tightness_pattern =
make_block_pattern( '-bbvtl',
$rOpts->{'block-brace-vertical-tightness-list'} );
}
return;
} ## end sub make_block_brace_vertical_tightness_pattern
sub make_blank_line_pattern {
$blank_lines_before_closing_block_pattern = $SUB_PATTERN;
my $key = 'blank-lines-before-closing-block-list';
if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
$blank_lines_before_closing_block_pattern =
make_block_pattern( '-blbcl', $rOpts->{$key} );
}
$blank_lines_after_opening_block_pattern = $SUB_PATTERN;
$key = 'blank-lines-after-opening-block-list';
if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
$blank_lines_after_opening_block_pattern =
make_block_pattern( '-blaol', $rOpts->{$key} );
}
return;
} ## end sub make_blank_line_pattern
sub make_block_pattern {
# given a string of block-type keywords, return a regex to match them
# The only tricky part is that labels are indicated with a single ':'
# and the 'sub' token text may have additional text after it (name of
# sub).
#
# Example:
#
# input string: "if else elsif unless while for foreach do : sub";
# pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
# Minor Update:
#
# To distinguish between anonymous subs and named subs, use 'sub' to
# indicate a named sub, and 'asub' to indicate an anonymous sub
my ( $abbrev, $string ) = @_;
my @list = split_words($string);
my @words = ();
my %seen;
for my $i (@list) {
if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
next if $seen{$i};
$seen{$i} = 1;
if ( $i eq 'sub' ) {
}
elsif ( $i eq 'asub' ) {
}
elsif ( $i eq ';' ) {
push @words, ';';
}
elsif ( $i eq '{' ) {
push @words, '\{';
}
elsif ( $i eq ':' ) {
push @words, '\w+:';
}
elsif ( $i =~ /^\w/ ) {
push @words, $i;
}
else {
Warn("unrecognized block type $i after $abbrev, ignoring\n");
}
}
# Fix 2 for c091, prevent the pattern from matching an empty string
# '1 ' is an impossible block name.
if ( !@words ) { push @words, "1 " }
my $pattern = '(' . join( '|', @words ) . ')$';
my $sub_patterns = EMPTY_STRING;
if ( $seen{'sub'} ) {
$sub_patterns .= '|' . $SUB_PATTERN;
}
if ( $seen{'asub'} ) {
$sub_patterns .= '|' . $ASUB_PATTERN;
}
if ($sub_patterns) {
$pattern = '(' . $pattern . $sub_patterns . ')';
}
$pattern = '^' . $pattern;
return $pattern;
} ## end sub make_block_pattern
sub make_static_side_comment_pattern {
# create the pattern used to identify static side comments
$static_side_comment_pattern = '^##';
# allow the user to change it
if ( $rOpts->{'static-side-comment-prefix'} ) {
my $prefix = $rOpts->{'static-side-comment-prefix'};
$prefix =~ s/^\s*//;
my $pattern = '^' . $prefix;
if ( bad_pattern($pattern) ) {
Die(
"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"
);
}
$static_side_comment_pattern = $pattern;
}
return;
} ## end sub make_static_side_comment_pattern
sub make_closing_side_comment_prefix {
# Be sure we have a valid closing side comment prefix
my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
my $csc_prefix_pattern;
if ( !defined($csc_prefix) ) {
$csc_prefix = '## end';
$csc_prefix_pattern = '^##\s+end';
}
else {
my $test_csc_prefix = $csc_prefix;
if ( $test_csc_prefix !~ /^#/ ) {
$test_csc_prefix = '#' . $test_csc_prefix;
}
# make a regex to recognize the prefix
my $test_csc_prefix_pattern = $test_csc_prefix;
# escape any special characters
$test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
$test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
# allow exact number of intermediate spaces to vary
$test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
# make sure we have a good pattern
# if we fail this we probably have an error in escaping
# characters.
if ( bad_pattern($test_csc_prefix_pattern) ) {
# shouldn't happen..must have screwed up escaping, above
if (DEVEL_MODE) {
Fault(<<EOM);
Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'
EOM
}
# just warn and keep going with defaults
Warn(
"Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
);
Warn("Please consider using a simpler -cscp prefix\n");
Warn("Using default -cscp instead; please check output\n");
}
else {
$csc_prefix = $test_csc_prefix;
$csc_prefix_pattern = $test_csc_prefix_pattern;
}
}
$rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
$closing_side_comment_prefix_pattern = $csc_prefix_pattern;
return;
} ## end sub make_closing_side_comment_prefix
##################################################
# CODE SECTION 4: receive lines from the tokenizer
##################################################
{ ## begin closure write_line
my $nesting_depth;
# Variables used by sub check_sequence_numbers:
my $last_seqno;
my %saw_opening_seqno;
my %saw_closing_seqno;
my $initial_seqno;
sub initialize_write_line {
$nesting_depth = undef;
$last_seqno = SEQ_ROOT;
%saw_opening_seqno = ();
%saw_closing_seqno = ();
return;
} ## end sub initialize_write_line
sub check_sequence_numbers {
# Routine for checking sequence numbers. This only needs to be
# done occasionally in DEVEL_MODE to be sure everything is working
# correctly.
my ( $rtokens, $rtoken_type, $rtype_sequence, $input_line_no ) = @_;
my $jmax = @{$rtokens} - 1;
return if ( $jmax < 0 );
foreach my $j ( 0 .. $jmax ) {
my $seqno = $rtype_sequence->[$j];
my $token = $rtokens->[$j];
my $type = $rtoken_type->[$j];
$seqno = EMPTY_STRING unless ( defined($seqno) );
my $err_msg =
"Error at j=$j, line number $input_line_no, seqno='$seqno', type='$type', tok='$token':\n";
if ( !$seqno ) {
# Sequence numbers are generated for opening tokens, so every opening
# token should be sequenced. Closing tokens will be unsequenced
# if they do not have a matching opening token.
if ( $is_opening_sequence_token{$token}
&& $type ne 'q'
&& $type ne 'Q' )
{
Fault(
<<EOM
$err_msg Unexpected opening token without sequence number
EOM
);
}
}
else {
# Save starting seqno to identify sequence method:
# New method starts with 2 and has continuous numbering
# Old method starts with >2 and may have gaps
if ( !defined($initial_seqno) ) { $initial_seqno = $seqno }
if ( $is_opening_sequence_token{$token} ) {
# New method should have continuous numbering
if ( $initial_seqno == 2 && $seqno != $last_seqno + 1 ) {
Fault(
<<EOM
$err_msg Unexpected opening sequence number: previous seqno=$last_seqno, but seqno= $seqno
EOM
);
}
$last_seqno = $seqno;
# Numbers must be unique
if ( $saw_opening_seqno{$seqno} ) {
my $lno = $saw_opening_seqno{$seqno};
Fault(
<<EOM
$err_msg Already saw an opening tokens at line $lno with this sequence number
EOM
);
}
$saw_opening_seqno{$seqno} = $input_line_no;
}
# only one closing item per seqno
elsif ( $is_closing_sequence_token{$token} ) {
if ( $saw_closing_seqno{$seqno} ) {
my $lno = $saw_closing_seqno{$seqno};
Fault(
<<EOM
$err_msg Already saw a closing token with this seqno at line $lno
EOM
);
}
$saw_closing_seqno{$seqno} = $input_line_no;
# Every closing seqno must have an opening seqno
if ( !$saw_opening_seqno{$seqno} ) {
Fault(
<<EOM
$err_msg Saw a closing token but no opening token with this seqno
EOM
);
}
}
# Sequenced items must be opening or closing
else {
Fault(
<<EOM
$err_msg Unexpected token type with a sequence number
EOM
);
}
}
}
return;
} ## end sub check_sequence_numbers
sub store_block_type {
my ( $self, $block_type, $seqno ) = @_;
return if ( !$block_type );
# Save the type of a block in a hash using sequence number as key
$self->[_rblock_type_of_seqno_]->{$seqno} = $block_type;
# and save named subs and anynymous subs in separate hashes so that
# we only have to do the pattern tests once.
if ( $matches_ASUB{$block_type} ) {
$self->[_ris_asub_block_]->{$seqno} = 1;
}
elsif ( $block_type =~ /$SUB_PATTERN/ ) {
$self->[_ris_sub_block_]->{$seqno} = 1;
}
else {
## ok - not a sub
}
return;
} ## end sub store_block_type
# hash keys which are common to old and new line_of_tokens
my @common_keys;
BEGIN {
@common_keys = qw(
_curly_brace_depth
_ending_in_quote
_guessed_indentation_level
_line_number
_line_text
_line_type
_paren_depth
_quote_character
_square_bracket_depth
_starting_in_quote
);
}
sub write_line {
# This routine receives lines one-by-one from the tokenizer and stores
# them in a format suitable for further processing. After the last
# line has been sent, the tokenizer will call sub 'finish_formatting'
# to do the actual formatting.
my ( $self, $line_of_tokens_old ) = @_;
my $rLL = $self->[_rLL_];
my $line_of_tokens = {};
# copy common hash key values
@{$line_of_tokens}{@common_keys} = @{$line_of_tokens_old}{@common_keys};
my $line_type = $line_of_tokens_old->{_line_type};
my $tee_output;
my $Klimit = $self->[_Klimit_];
my $Kfirst;
# Handle line of non-code
if ( $line_type ne 'CODE' ) {
$tee_output ||= $rOpts_tee_pod
&& substr( $line_type, 0, 3 ) eq 'POD';
$line_of_tokens->{_level_0} = 0;
$line_of_tokens->{_ci_level_0} = 0;
$line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING;
$line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING;
$line_of_tokens->{_ended_in_blank_token} = undef;
}
# Handle line of code
else {
my $rtokens = $line_of_tokens_old->{_rtokens};
my $jmax = @{$rtokens} - 1;
if ( $jmax >= 0 ) {
$Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
#----------------------------
# get the tokens on this line
#----------------------------
$self->write_line_inner_loop( $line_of_tokens_old,
$line_of_tokens );
# update Klimit for added tokens
$Klimit = @{$rLL} - 1;
} ## end if ( $jmax >= 0 )
else {
# blank line
$line_of_tokens->{_level_0} = 0;
$line_of_tokens->{_ci_level_0} = 0;
$line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING;
$line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING;
$line_of_tokens->{_ended_in_blank_token} = undef;
}
$tee_output ||=
$rOpts_tee_block_comments
&& $jmax == 0
&& $rLL->[$Kfirst]->[_TYPE_] eq '#';
$tee_output ||=
$rOpts_tee_side_comments
&& defined($Kfirst)
&& $Klimit > $Kfirst
&& $rLL->[$Klimit]->[_TYPE_] eq '#';
} ## end if ( $line_type eq 'CODE')
# Finish storing line variables
$line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
$self->[_Klimit_] = $Klimit;
my $rlines = $self->[_rlines_];
push @{$rlines}, $line_of_tokens;
if ($tee_output) {
my $fh_tee = $self->[_fh_tee_];
my $line_text = $line_of_tokens_old->{_line_text};
$fh_tee->print($line_text) if ($fh_tee);
}
return;
} ## end sub write_line
sub write_line_inner_loop {
my ( $self, $line_of_tokens_old, $line_of_tokens ) = @_;
#---------------------------------------------------------------------
# Copy the tokens on one line received from the tokenizer to their new
# storage locations.
#---------------------------------------------------------------------
# Input parameters:
# $line_of_tokens_old = line received from tokenizer
# $line_of_tokens = line of tokens being formed for formatter
my $rtokens = $line_of_tokens_old->{_rtokens};
my $jmax = @{$rtokens} - 1;
if ( $jmax < 0 ) {
# safety check; shouldn't happen
DEVEL_MODE && Fault("unexpected jmax=$jmax\n");
return;
}
my $line_index = $line_of_tokens_old->{_line_number} - 1;
my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
my $rblock_type = $line_of_tokens_old->{_rblock_type};
my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
my $rlevels = $line_of_tokens_old->{_rlevels};
my $rLL = $self->[_rLL_];
my $rSS = $self->[_rSS_];
my $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
DEVEL_MODE
&& check_sequence_numbers( $rtokens, $rtoken_type,
$rtype_sequence, $line_index + 1 );
# Find the starting nesting depth ...
# It must be the value of variable 'level' of the first token
# because the nesting depth is used as a token tag in the
# vertical aligner and is compared to actual levels.
# So vertical alignment problems will occur with any other
# starting value.
if ( !defined($nesting_depth) ) {
$nesting_depth = $rlevels->[0];
$nesting_depth = 0 if ( $nesting_depth < 0 );
$rdepth_of_opening_seqno->[SEQ_ROOT] = $nesting_depth - 1;
}
my $j = -1;
# NOTE: coding efficiency is critical in this loop over all tokens
foreach my $token ( @{$rtokens} ) {
# NOTE: Do not clip the 'level' variable yet if it is negative. We
# will do that later, in sub 'store_token_to_go'. The reason is
# that in files with level errors, the logic in 'weld_cuddled_else'
# uses a stack logic that will give bad welds if we clip levels
# here. (A recent update will probably not even allow negative
# levels to arrive here any longer).
my $seqno = EMPTY_STRING;
# Handle tokens with sequence numbers ...
# note the ++ increment hidden here for efficiency
if ( $rtype_sequence->[ ++$j ] ) {
$seqno = $rtype_sequence->[$j];
my $sign = 1;
if ( $is_opening_token{$token} ) {
$self->[_K_opening_container_]->{$seqno} = @{$rLL};
$rdepth_of_opening_seqno->[$seqno] = $nesting_depth;
$nesting_depth++;
# Save a sequenced block type at its opening token.
# Note that unsequenced block types can occur in
# unbalanced code with errors but are ignored here.
$self->store_block_type( $rblock_type->[$j], $seqno )
if ( $rblock_type->[$j] );
}
elsif ( $is_closing_token{$token} ) {
# The opening depth should always be defined, and
# it should equal $nesting_depth-1. To protect
# against unforseen error conditions, however, we
# will check this and fix things if necessary. For
# a test case see issue c055.
my $opening_depth = $rdepth_of_opening_seqno->[$seqno];
if ( !defined($opening_depth) ) {
$opening_depth = $nesting_depth - 1;
$opening_depth = 0 if ( $opening_depth < 0 );
$rdepth_of_opening_seqno->[$seqno] = $opening_depth;
# This is not fatal but should not happen. The
# tokenizer generates sequence numbers
# incrementally upon encountering each new
# opening token, so every positive sequence
# number should correspond to an opening token.
DEVEL_MODE && Fault(<<EOM);
No opening token seen for closing token = '$token' at seq=$seqno at depth=$opening_depth
EOM
}
$self->[_K_closing_container_]->{$seqno} = @{$rLL};
$nesting_depth = $opening_depth;
$sign = -1;
}
elsif ( $token eq '?' ) {
$self->[_K_opening_ternary_]->{$seqno} = @{$rLL};
}
elsif ( $token eq ':' ) {
$sign = -1;
$self->[_K_closing_ternary_]->{$seqno} = @{$rLL};
}
# The only sequenced types output by the tokenizer are
# the opening & closing containers and the ternary
# types. So we would only get here if the tokenizer has
# been changed to mark some other tokens with sequence
# numbers, or if an error has been introduced in a
# hash such as %is_opening_container
else {
DEVEL_MODE && Fault(<<EOM);
Unexpected sequenced token '$token' of type '$rtoken_type->[$j]', sequence=$seqno arrived from tokenizer.
Expecting only opening or closing container tokens or ternary tokens with sequence numbers.
EOM
}
if ( $sign > 0 ) {
$self->[_Iss_opening_]->[$seqno] = @{$rSS};
# For efficiency, we find the maximum level of
# opening tokens of any type. The actual maximum
# level will be that of their contents which is 1
# greater. That will be fixed in sub
# 'finish_formatting'.
my $level = $rlevels->[$j];
if ( $level > $self->[_maximum_level_] ) {
$self->[_maximum_level_] = $level;
$self->[_maximum_level_at_line_] = $line_index + 1;
}
}
else { $self->[_Iss_closing_]->[$seqno] = @{$rSS} }
push @{$rSS}, $sign * $seqno;
}
# Here we are storing the first five variables per token. The
# remaining token variables will be added later as follows:
# _TOKEN_LENGTH_ is added by sub store_token
# _CUMULATIVE_LENGTH_ is added by sub store_token
# _KNEXT_SEQ_ITEM_ is added by sub respace_post_loop_ops
# _CI_LEVEL_ is added by sub set_ci
# So all token variables are available for use after sub set_ci.
my @tokary;
$tokary[_TOKEN_] = $token;
$tokary[_TYPE_] = $rtoken_type->[$j];
$tokary[_TYPE_SEQUENCE_] = $seqno;
$tokary[_LEVEL_] = $rlevels->[$j];
$tokary[_LINE_INDEX_] = $line_index;
push @{$rLL}, \@tokary;
} ## end token loop
# Need to remember if we can trim the input line
$line_of_tokens->{_ended_in_blank_token} = $rtoken_type->[$jmax] eq 'b';
# Values needed by Logger
$line_of_tokens->{_level_0} = $rlevels->[0];
$line_of_tokens->{_ci_level_0} = 0; # sub set_ci will fix this
$line_of_tokens->{_nesting_blocks_0} =
$line_of_tokens_old->{_nesting_blocks_0};
$line_of_tokens->{_nesting_tokens_0} =
$line_of_tokens_old->{_nesting_tokens_0};
return;
} ## end sub write_line_inner_loop
} ## end closure write_line
#############################################
# CODE SECTION 5: Pre-process the entire file
#############################################
sub finish_formatting {
my ( $self, $severe_error ) = @_;
# The file has been tokenized and is ready to be formatted.
# All of the relevant data is stored in $self, ready to go.
# Returns:
# true if input file was copied verbatim due to errors
# false otherwise
# Some of the code in sub break_lists is not robust enough to process code
# with arbitrary brace errors. The simplest fix is to just return the file
# verbatim if there are brace errors. This fixes issue c160.
$severe_error ||= get_saw_brace_error();
# Check the maximum level. If it is extremely large we will give up and
# output the file verbatim. Note that the actual maximum level is 1
# greater than the saved value, so we fix that here.
$self->[_maximum_level_] += 1;
my $maximum_level = $self->[_maximum_level_];
my $maximum_table_index = $#maximum_line_length_at_level;
if ( !$severe_error && $maximum_level >= $maximum_table_index ) {
$severe_error ||= 1;
Warn(<<EOM);
The maximum indentation level, $maximum_level, exceeds the builtin limit of $maximum_table_index.
Something may be wrong; formatting will be skipped.
EOM
}
# Dump any requested block summary data
if ( $rOpts->{'dump-block-summary'} ) {
if ($severe_error) { Exit(1) }
$self->dump_block_summary();
Exit(0);
}
# output file verbatim if severe error or no formatting requested
if ( $severe_error || $rOpts->{notidy} ) {
$self->dump_verbatim();
$self->wrapup($severe_error);
return 1;
}
# Update the 'save_logfile' flag based to include any tokenization errors.
# We can save time by skipping logfile calls if it is not going to be saved.
my $logger_object = $self->[_logger_object_];
if ($logger_object) {
my $save_logfile = $logger_object->get_save_logfile();
$self->[_save_logfile_] = $save_logfile;
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->set_save_logfile($save_logfile);
}
{
my $rix_side_comments = $self->set_CODE_type();
$self->find_non_indenting_braces($rix_side_comments);
# Handle any requested side comment deletions. It is easier to get
# this done here rather than farther down the pipeline because IO
# lines take a different route, and because lines with deleted HSC
# become BL lines. We have already handled any tee requests in sub
# getline, so it is safe to delete side comments now.
$self->delete_side_comments($rix_side_comments)
if ( $rOpts_delete_side_comments
|| $rOpts_delete_closing_side_comments );
}
# Verify that the line hash does not have any unknown keys.
$self->check_line_hashes() if (DEVEL_MODE);
{
# Make a pass through all tokens, adding or deleting any whitespace as
# required. Also make any other changes, such as adding semicolons.
# All token changes must be made here so that the token data structure
# remains fixed for the rest of this iteration.
my ( $error, $rqw_lines ) = $self->respace_tokens();
if ($error) {
$self->dump_verbatim();
$self->wrapup();
return 1;
}
# sub 'set_ci' is called after sub respace to allow use of type counts
# Token variable _CI_LEVEL_ is only defined after this call
$self->set_ci();
$self->find_multiline_qw($rqw_lines);
}
$self->examine_vertical_tightness_flags();
$self->set_excluded_lp_containers();
$self->keep_old_line_breaks();
# Implement any welding needed for the -wn or -cb options
$self->weld_containers();
# Collect info needed to implement the -xlp style
$self->xlp_collapsed_lengths()
if ( $rOpts_line_up_parentheses && $rOpts_extended_line_up_parentheses );
# Locate small nested blocks which should not be broken
$self->mark_short_nested_blocks();
$self->special_indentation_adjustments();
# Verify that the main token array looks OK. If this ever causes a fault
# then place similar checks before the sub calls above to localize the
# problem.
$self->check_rLL("Before 'process_all_lines'") if (DEVEL_MODE);
# Finishes formatting and write the result to the line sink.
# Eventually this call should just change the 'rlines' data according to the
# new line breaks and then return so that we can do an internal iteration
# before continuing with the next stages of formatting.
$self->process_all_lines();
# A final routine to tie up any loose ends
$self->wrapup();
return;
} ## end sub finish_formatting
my %is_loop_type;
BEGIN {
my @q = qw( for foreach while do until );
@{is_loop_type}{@q} = (1) x scalar(@q);
}
sub find_level_info {
# Find level ranges and total variations of all code blocks in this file.
# Returns:
# ref to hash with block info, with seqno as key (see below)
my ($self) = @_;
# The array _rSS_ has the complete container tree for this file.
my $rSS = $self->[_rSS_];
# We will be ignoring everything except code block containers
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my @stack;
my %level_info;
# TREE_LOOP:
foreach my $sseq ( @{$rSS} ) {
my $stack_depth = @stack;
my $seq_next = $sseq > 0 ? $sseq : -$sseq;
next if ( !$rblock_type_of_seqno->{$seq_next} );
if ( $sseq > 0 ) {
# STACK_LOOP:
my $item;
foreach my $seq (@stack) {
$item = $level_info{$seq};
if ( $item->{maximum_depth} < $stack_depth ) {
$item->{maximum_depth} = $stack_depth;
}
$item->{block_count}++;
} ## end STACK LOOP
push @stack, $seq_next;
my $block_type = $rblock_type_of_seqno->{$seq_next};
# If this block is a loop nested within a loop, then we
# will mark it as an 'inner_loop'. This is a useful
# complexity measure.
my $is_inner_loop = 0;
if ( $is_loop_type{$block_type} && defined($item) ) {
$is_inner_loop = $is_loop_type{ $item->{block_type} };
}
$level_info{$seq_next} = {
starting_depth => $stack_depth,
maximum_depth => $stack_depth,
block_count => 1,
block_type => $block_type,
is_inner_loop => $is_inner_loop,
};
}
else {
my $seq_test = pop @stack;
# error check
if ( $seq_test != $seq_next ) {
# Shouldn't happen - the $rSS array must have an error
DEVEL_MODE && Fault("stack error finding total depths\n");
%level_info = ();
last;
}
}
} ## end TREE_LOOP
return \%level_info;
} ## end sub find_level_info
sub find_loop_label {
my ( $self, $seqno ) = @_;
# Given:
# $seqno = sequence number of a block of code for a loop
# Return:
# $label = the loop label text, if any, or an empty string
my $rLL = $self->[_rLL_];
my $rlines = $self->[_rlines_];
my $K_opening_container = $self->[_K_opening_container_];
my $label = EMPTY_STRING;
my $K_opening = $K_opening_container->{$seqno};
# backup to the line with the opening paren, if any, in case the
# keyword is on a different line
my $Kp = $self->K_previous_code($K_opening);
return $label unless ( defined($Kp) );
if ( $rLL->[$Kp]->[_TOKEN_] eq ')' ) {
$seqno = $rLL->[$Kp]->[_TYPE_SEQUENCE_];
$K_opening = $K_opening_container->{$seqno};
}
return $label unless ( defined($K_opening) );
my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_];
# look for a label within a few lines; allow a couple of blank lines
foreach my $lx ( reverse( $lx_open - 3 .. $lx_open ) ) {
last if ( $lx < 0 );
my $line_of_tokens = $rlines->[$lx];
my $line_type = $line_of_tokens->{_line_type};
# stop search on a non-code line
last if ( $line_type ne 'CODE' );
my $rK_range = $line_of_tokens->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
# skip a blank line
next if ( !defined($Kfirst) );
# check for a lable
if ( $rLL->[$Kfirst]->[_TYPE_] eq 'J' ) {
$label = $rLL->[$Kfirst]->[_TOKEN_];
last;
}
# quit the search if we are above the starting line
last if ( $lx < $lx_open );
}
return $label;
} ## end sub find_loop_label
{ ## closure find_mccabe_count
my %is_mccabe_logic_keyword;
my %is_mccabe_logic_operator;
BEGIN {
my @q = (qw( && || ||= &&= ? <<= >>= ));
@is_mccabe_logic_operator{@q} = (1) x scalar(@q);
@q = (qw( and or xor if else elsif unless until while for foreach ));
@is_mccabe_logic_keyword{@q} = (1) x scalar(@q);
} ## end BEGIN
sub find_mccabe_count {
my ($self) = @_;
# Find the cumulative mccabe count to each token
# Return '$rmccabe_count_sum' = ref to array with cumulative
# mccabe count to each token $K
# NOTE: This sub currently follows the definitions in Perl::Critic
my $rmccabe_count_sum;
my $rLL = $self->[_rLL_];
my $count = 0;
my $Klimit = $self->[_Klimit_];
foreach my $KK ( 0 .. $Klimit ) {
$rmccabe_count_sum->{$KK} = $count;
my $type = $rLL->[$KK]->[_TYPE_];
if ( $type eq 'k' ) {
my $token = $rLL->[$KK]->[_TOKEN_];
if ( $is_mccabe_logic_keyword{$token} ) { $count++ }
}
else {
if ( $is_mccabe_logic_operator{$type} ) {
$count++;
}
}
}
$rmccabe_count_sum->{ $Klimit + 1 } = $count;
return $rmccabe_count_sum;
} ## end sub find_mccabe_count
} ## end closure find_mccabe_count
sub find_code_line_count {
my ($self) = @_;
# Find the cumulative number of lines of code, excluding blanks,
# comments and pod.
# Return '$rcode_line_count' = ref to array with cumulative
# code line count for each input line number.
my $rcode_line_count;
my $rLL = $self->[_rLL_];
my $rlines = $self->[_rlines_];
my $ix_line = -1;
my $code_line_count = 0;
# loop over all lines
foreach my $line_of_tokens ( @{$rlines} ) {
$ix_line++;
# what type of line?
my $line_type = $line_of_tokens->{_line_type};
# if 'CODE' it must be non-blank and non-comment
if ( $line_type eq 'CODE' ) {
my $rK_range = $line_of_tokens->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
if ( defined($Kfirst) ) {
# it is non-blank
my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1;
if ( $jmax > 0 || $rLL->[$Klast]->[_TYPE_] ne '#' ) {
# ok, it is a non-comment
$code_line_count++;
}
}
}
# Count all other special line types except pod;
# For a list of line types see sub 'process_all_lines'
else {
if ( $line_type !~ /^POD/ ) { $code_line_count++ }
}
# Store the cumulative count using the input line index
$rcode_line_count->[$ix_line] = $code_line_count;
}
return $rcode_line_count;
} ## end sub find_code_line_count
sub find_selected_packages {
my ( $self, $rdump_block_types ) = @_;
# returns a list of all selected package statements in a file
my @package_list;
if ( !$rdump_block_types->{'*'}
&& !$rdump_block_types->{'package'}
&& !$rdump_block_types->{'class'} )
{
return \@package_list;
}
my $rLL = $self->[_rLL_];
my $Klimit = $self->[_Klimit_];
my $rlines = $self->[_rlines_];
my $K_closing_container = $self->[_K_closing_container_];
my @package_sweep;
foreach my $KK ( 0 .. $Klimit ) {
my $item = $rLL->[$KK];
my $type = $item->[_TYPE_];
# fix for c250: package type has changed from 'i' to 'P'
next if ( $type ne 'P' );
my $token = $item->[_TOKEN_];
if ( substr( $token, 0, 7 ) eq 'package' && $token =~ /^package\s/
|| substr( $token, 0, 5 ) eq 'class' && $token =~ /^class\s/ )
{
$token =~ s/\s+/ /g;
my ( $keyword, $name ) = split /\s+/, $token, 2;
my $lx_start = $item->[_LINE_INDEX_];
my $level = $item->[_LEVEL_];
my $parent_seqno = $self->parent_seqno_by_K($KK);
# Skip a class BLOCK because it will be handled as a block
if ( $keyword eq 'class' ) {
my $line_of_tokens = $rlines->[$lx_start];
my $rK_range = $line_of_tokens->{_rK_range};
my ( $K_first, $K_last ) = @{$rK_range};
if ( $rLL->[$K_last]->[_TYPE_] eq '#' ) {
$K_last = $self->K_previous_code($K_last);
}
if ( defined($K_last) ) {
my $seqno_class = $rLL->[$K_last]->[_TYPE_SEQUENCE_];
my $block_type_next =
$self->[_rblock_type_of_seqno_]->{$seqno_class};
# these block types are currently marked 'package'
# but may be 'class' in the future, so allow both.
if ( defined($block_type_next)
&& $block_type_next =~ /^(class|package)\b/ )
{
next;
}
}
}
my $K_closing = $Klimit;
if ( $parent_seqno != SEQ_ROOT ) {
my $Kc = $K_closing_container->{$parent_seqno};
if ( defined($Kc) ) {
$K_closing = $Kc;
}
}
# This package ends any previous package at this level
if ( defined( my $ix = $package_sweep[$level] ) ) {
my $rpk = $package_list[$ix];
my $Kc = $rpk->{K_closing};
if ( $Kc > $KK ) {
$rpk->{K_closing} = $KK - 1;
}
}
$package_sweep[$level] = @package_list;
# max_change and block_count are not currently reported 'package'
push @package_list,
{
line_start => $lx_start + 1,
K_opening => $KK,
K_closing => $Klimit,
name => $name,
type => $keyword,
level => $level,
max_change => 0,
block_count => 0,
};
}
}
return \@package_list;
} ## end sub find_selected_packages
sub find_selected_blocks {
my ( $self, $rdump_block_types ) = @_;
# Find blocks needed for --dump-block-summary
# Returns:
# $rslected_blocks = ref to a list of information on the selected blocks
my $rLL = $self->[_rLL_];
my $rlines = $self->[_rlines_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $ris_asub_block = $self->[_ris_asub_block_];
my $ris_sub_block = $self->[_ris_sub_block_];
my $dump_all_types = $rdump_block_types->{'*'};
# Get level variation info for code blocks
my $rlevel_info = $self->find_level_info();
my @selected_blocks;
#---------------------------------------------------
# BEGIN loop over all blocks to find selected blocks
#---------------------------------------------------
foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
my $type;
my $name = EMPTY_STRING;
my $block_type = $rblock_type_of_seqno->{$seqno};
my $K_opening = $K_opening_container->{$seqno};
my $K_closing = $K_closing_container->{$seqno};
my $level = $rLL->[$K_opening]->[_LEVEL_];
my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_];
my $line_of_tokens = $rlines->[$lx_open];
my $rK_range = $line_of_tokens->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
if ( !defined($Kfirst) || !defined($Klast) || $Kfirst > $K_opening ) {
my $line_type = $line_of_tokens->{_line_type};
# shouldn't happen
my $CODE_type = $line_of_tokens->{_code_type};
DEVEL_MODE && Fault(<<EOM);
unexpected line_type=$line_type at line $lx_open, code type=$CODE_type
EOM
next;
}
my ( $max_change, $block_count, $inner_loop_plus ) =
( 0, 0, EMPTY_STRING );
my $item = $rlevel_info->{$seqno};
if ( defined($item) ) {
my $starting_depth = $item->{starting_depth};
my $maximum_depth = $item->{maximum_depth};
$block_count = $item->{block_count};
$max_change = $maximum_depth - $starting_depth + 1;
# this is a '+' character if this block is an inner loops
$inner_loop_plus = $item->{is_inner_loop} ? '+' : EMPTY_STRING;
}
# Skip closures unless type 'closure' is explicitly requested
if ( ( $block_type eq '}' || $block_type eq ';' )
&& $rdump_block_types->{'closure'} )
{
$type = 'closure';
}
# Both 'sub' and 'asub' select an anonymous sub.
# This allows anonymous subs to be explicitely selected
elsif (
$ris_asub_block->{$seqno}
&& ( $dump_all_types
|| $rdump_block_types->{'sub'}
|| $rdump_block_types->{'asub'} )
)
{
$type = 'asub';
# Look back to try to find some kind of name, such as
# my $var = sub { - var is type 'i'
# var => sub { - var is type 'w'
# -var => sub { - var is type 'w'
# 'var' => sub { - var is type 'Q'
my ( $saw_equals, $saw_fat_comma, $blank_count );
foreach my $KK ( reverse( $Kfirst .. $K_opening - 1 ) ) {
my $token_type = $rLL->[$KK]->[_TYPE_];
if ( $token_type eq 'b' ) { $blank_count++; next }
if ( $token_type eq '=>' ) { $saw_fat_comma++; next }
if ( $token_type eq '=' ) { $saw_equals++; next }
if ( $token_type eq 'i' && $saw_equals
|| ( $token_type eq 'w' || $token_type eq 'Q' )
&& $saw_fat_comma )
{
$name = $rLL->[$KK]->[_TOKEN_];
last;
}
}
}
elsif ( $ris_sub_block->{$seqno}
&& ( $dump_all_types || $rdump_block_types->{'sub'} ) )
{
$type = 'sub';
# what we want:
# $block_type $name
# 'sub setidentifier($)' => 'setidentifier'
# 'method setidentifier($)' => 'setidentifier'
my @parts = split /\s+/, $block_type;
$name = $parts[1];
$name =~ s/\(.*$//;
}
elsif (
$block_type =~ /^(package|class)\b/
&& ( $dump_all_types
|| $rdump_block_types->{'package'}
|| $rdump_block_types->{'class'} )
)
{
$type = 'class';
my @parts = split /\s+/, $block_type;
$name = $parts[1];
$name =~ s/\(.*$//;
}
elsif (
$is_loop_type{$block_type}
&& ( $dump_all_types
|| $rdump_block_types->{$block_type}
|| $rdump_block_types->{ $block_type . $inner_loop_plus }
|| $rdump_block_types->{$inner_loop_plus} )
)
{
$type = $block_type . $inner_loop_plus;
}
elsif ( $dump_all_types || $rdump_block_types->{$block_type} ) {
if ( $is_loop_type{$block_type} ) {
$name = $self->find_loop_label($seqno);
}
$type = $block_type;
}
else {
next;
}
push @selected_blocks,
{
K_opening => $K_opening,
K_closing => $K_closing,
line_start => $lx_open + 1,
name => $name,
type => $type,
level => $level,
max_change => $max_change,
block_count => $block_count,
};
} ## END loop to get info for selected blocks
return \@selected_blocks;
} ## end sub find_selected_blocks
sub dump_block_summary {
my ($self) = @_;
# Dump information about selected code blocks to STDOUT
# This sub is called when
# --dump-block-summary (-dbs) is set.
# The following controls are available:
# --dump-block-types=s (-dbt=s), where s is a list of block types
# (if else elsif for foreach while do ... sub) ; default is 'sub'
# --dump-block-minimum-lines=n (-dbml=n), where n is the minimum
# number of lines for a block to be included; default is 20.
my $rOpts_dump_block_types = $rOpts->{'dump-block-types'};
if ( !defined($rOpts_dump_block_types) ) { $rOpts_dump_block_types = 'sub' }
$rOpts_dump_block_types =~ s/^\s+//;
$rOpts_dump_block_types =~ s/\s+$//;
my @list = split /\s+/, $rOpts_dump_block_types;
my %dump_block_types;
@{dump_block_types}{@list} = (1) x scalar(@list);
# Get block info
my $rselected_blocks = $self->find_selected_blocks( \%dump_block_types );
# Get package info
my $rpackage_list = $self->find_selected_packages( \%dump_block_types );
return if ( !@{$rselected_blocks} && !@{$rpackage_list} );
my $input_stream_name = get_input_stream_name();
# Get code line count
my $rcode_line_count = $self->find_code_line_count();
# Get mccabe count
my $rmccabe_count_sum = $self->find_mccabe_count();
my $rOpts_dump_block_minimum_lines = $rOpts->{'dump-block-minimum-lines'};
if ( !defined($rOpts_dump_block_minimum_lines) ) {
$rOpts_dump_block_minimum_lines = 20;
}
my $rLL = $self->[_rLL_];
# merge blocks and packages, add various counts, filter and print to STDOUT
my $routput_lines = [];
foreach my $item ( @{$rselected_blocks}, @{$rpackage_list} ) {
my $K_opening = $item->{K_opening};
my $K_closing = $item->{K_closing};
# define total number of lines
my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_];
my $lx_close = $rLL->[$K_closing]->[_LINE_INDEX_];
my $line_count = $lx_close - $lx_open + 1;
# define total number of lines of code excluding blanks, comments, pod
my $code_lines_open = $rcode_line_count->[$lx_open];
my $code_lines_close = $rcode_line_count->[$lx_close];
my $code_lines = 0;
if ( defined($code_lines_open) && defined($code_lines_close) ) {
$code_lines = $code_lines_close - $code_lines_open + 1;
}
# filter out blocks below the selected code line limit
if ( $code_lines < $rOpts_dump_block_minimum_lines ) {
next;
}
# add mccabe_count for this block
my $mccabe_closing = $rmccabe_count_sum->{ $K_closing + 1 };
my $mccabe_opening = $rmccabe_count_sum->{$K_opening};
my $mccabe_count = 1; # add 1 to match Perl::Critic
if ( defined($mccabe_opening) && defined($mccabe_closing) ) {
$mccabe_count += $mccabe_closing - $mccabe_opening;
}
# Store the final set of print variables
push @{$routput_lines}, [
$input_stream_name,
$item->{line_start},
$line_count,
$code_lines,
$item->{type},
$item->{name},
$item->{level},
$item->{max_change},
$item->{block_count},
$mccabe_count,
];
}
return unless @{$routput_lines};
# Sort blocks and packages on starting line number
my @sorted_lines = sort { $a->[1] <=> $b->[1] } @{$routput_lines};
print {*STDOUT}
"file,line,line_count,code_lines,type,name,level,max_change,block_count,mccabe_count\n";
foreach my $rline_vars (@sorted_lines) {
my $line = join( ",", @{$rline_vars} ) . "\n";
print {*STDOUT} $line;
}
return;
} ## end sub dump_block_summary
sub set_ci {
my ($self) = @_;
# Set the basic continuation indentation (ci) for all tokens.
# This is a replacement for the values previously computed in
# sub Perl::Tidy::Tokenizer::tokenizer_wrapup. In most cases it
# produces identical results, but in a few cases it is an improvement.
use constant DEBUG_SET_CI => 0;
# This turns on an optional piece of logic which makes the new and
# old computations of ci agree. It has almost no effect on actual
# programs but is useful for testing.
use constant SET_CI_OPTION_0 => 1;
# This is slightly different from the hash in in break_lists
# with a similar name (removed '?' and ':' to fix t007 and others)
my %is_logical_container_for_ci;
my @q = qw# if elsif unless while and or err not && | || ! #;
@is_logical_container_for_ci{@q} = (1) x scalar(@q);
# This is slightly different from a tokenizer hash with a similar name:
my %is_container_label_type_for_ci;
@q = qw# k && | || ? : ! #;
@is_container_label_type_for_ci{@q} = (1) x scalar(@q);
# Undo ci of closing list paren followed by these binary operators:
# - initially defined for issue t027, then
# - added '=' for t015
# - added '=~' for 'locale.in'
# - added '<=>' for 'corelist.in'
# Note:
# See @value_requestor_type for more that might be included
# See also @is_binary_type
my %bin_op_type;
@q = qw# . ** -> + - / * = != ^ < > % >= <= =~ !~ <=> x #;
@bin_op_type{@q} = (1) x scalar(@q);
my %is_list_end_type;
@q = qw( ; { } );
push @q, ',';
@is_list_end_type{@q} = (1) x scalar(@q);
my $rLL = $self->[_rLL_];
my $Klimit = $self->[_Klimit_];
return unless defined($Klimit);
my $token = ';';
my $type = ';';
my $last_token = $token;
my $last_type = $type;
my $ci_last = 0;
my $ci_next = 0;
my $ci_next_next = 1;
my $rstack = [];
my $seq_root = SEQ_ROOT;
my $rparent = {
_seqno => $seq_root,
_ci_open => 0,
_ci_open_next => 0,
_ci_close => 0,
_ci_close_next => 0,
_container_type => 'Block',
_ci_next_next => $ci_next_next,
_comma_count => 0,
_semicolon_count => 0,
_Kc => undef,
};
# Debug stuff
my @debug_lines;
my %saw_ci_diff;
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $ris_sub_block = $self->[_ris_sub_block_];
my $ris_asub_block = $self->[_ris_asub_block_];
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $K_opening_ternary = $self->[_K_opening_ternary_];
my $K_closing_ternary = $self->[_K_closing_ternary_];
my $rlines = $self->[_rlines_];
my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
my $want_break_before_comma = $want_break_before{','};
my $map_block_follows = sub {
# return true if a sort/map/etc block follows the closing brace
# of container $seqno
my ($seqno) = @_;
my $Kc = $K_closing_container->{$seqno};
return unless defined($Kc);
my $Kcn = $self->K_next_code($Kc);
return unless defined($Kcn);
my $seqno_n = $rLL->[$Kcn]->[_TYPE_SEQUENCE_];
#return if ( defined($seqno_n) );
return if ($seqno_n);
my $Knn = $self->K_next_code($Kcn);
return unless defined($Knn);
my $seqno_nn = $rLL->[$Knn]->[_TYPE_SEQUENCE_];
return unless ($seqno_nn);
my $K_nno = $K_opening_container->{$seqno_nn};
return unless $K_nno && $K_nno == $Knn;
my $block_type = $rblock_type_of_seqno->{$seqno_nn};
if ($block_type) {
return $is_block_with_ci{$block_type};
}
return;
};
my $redo_preceding_comment_ci = sub {
# We need to reset the ci of the previous comment(s)
my ( $K, $ci ) = @_;
my $Km = $self->K_previous_code($K);
return if ( !defined($Km) );
foreach my $Kt ( $Km + 1 .. $K - 1 ) {
if ( $rLL->[$Kt]->[_TYPE_] eq '#' ) {
$rLL->[$Kt]->[_CI_LEVEL_] = $ci;
}
}
return;
};
# Definitions of the sequence of ci_values being maintained:
# $ci_last = the ci value of the previous non-blank, non-comment token
# $ci_this = the ci value to be stored for this token at index $KK
# $ci_next = the normal ci for the next token, set by the previous tok
# $ci_next_next = the normal next value of $ci_next in this container
#--------------------------
# Main loop over all tokens
#--------------------------
my $KK = -1;
foreach my $rtoken_K ( @{$rLL} ) {
$KK++;
$type = $rtoken_K->[_TYPE_];
#------------------
# Section 1. Blanks
#------------------
if ( $type eq 'b' ) {
$rtoken_K->[_CI_LEVEL_] = $ci_next;
# 'next' to avoid saving last_ values for blanks and commas
next;
}
#--------------------
# Section 2. Comments
#--------------------
if ( $type eq '#' ) {
my $ci_this = $ci_next;
# If at '#' in ternary before a ? or :, use that level to make
# the comment line up with the next ? or : line. (see c202/t052)
# i.e. if a nested ? follows, we increase the '#' level by 1, and
# if a nested : follows, we decrease the '#' level by 1.
# This is the only place where this sub changes a _LEVEL_ value.
my $Kn;
my $parent_container_type = $rparent->{_container_type};
if ( $parent_container_type eq 'Ternary' ) {
$Kn = $self->K_next_code($KK);
if ($Kn) {
my $type_kn = $rLL->[$Kn]->[_TYPE_];
if ( $is_ternary{$type_kn} ) {
my $level_KK = $rLL->[$KK]->[_LEVEL_];
my $level_Kn = $rLL->[$Kn]->[_LEVEL_];
$rLL->[$KK]->[_LEVEL_] = $rLL->[$Kn]->[_LEVEL_];
# and use the ci of a terminating ':'
if ( $Kn == $rparent->{_Kc} ) {
$ci_this = $rparent->{_ci_close};
}
}
}
}
# Undo ci for a block comment followed by a closing token or , or ;
# provided that the parent container:
# - ends without ci, or
# - starts ci=0 and is a comma list or this follows a closing type
# - has a level jump
if (
$ci_this
&& (
!$rparent->{_ci_close}
|| (
!$rparent->{_ci_open_next}
&& ( ( $rparent->{_comma_count} || $last_type eq ',' )
|| $is_closing_type{$last_type} )
)
)
)
{
# Be sure this is a block comment
my $lx = $rtoken_K->[_LINE_INDEX_];
my $rK_range = $rlines->[$lx]->{_rK_range};
my $Kfirst;
if ($rK_range) { $Kfirst = $rK_range->[0] }
if ( defined($Kfirst) && $Kfirst == $KK ) {
# Look for trailing closing token
# [ and possibly ',' or ';' ]
$Kn = $self->K_next_code($KK) if ( !$Kn );
my $Kc = $rparent->{_Kc};
if (
$Kn
&& $Kc
&& (
$Kn == $Kc
# only look for comma if -wbb=',' is set
# to minimize changes to existing formatting
|| ( $rLL->[$Kn]->[_TYPE_] eq ','
&& $want_break_before_comma
&& $parent_container_type eq 'List' )
# do not look ahead for a bare ';' because
# it changes old formatting with little benefit.
## || ( $rLL->[$Kn]->[_TYPE_] eq ';'
## && $parent_container_type eq 'Block' )
)
)
{
# Be sure container has a level jump
my $level_KK = $rLL->[$KK]->[_LEVEL_];
my $level_Kc = $rLL->[$Kc]->[_LEVEL_];
if ( $level_Kc < $level_KK ) {
$ci_this = 0;
}
}
}
}
$ci_next = $ci_this;
$rtoken_K->[_CI_LEVEL_] = $ci_this;
# 'next' to avoid saving last_ values for blanks and commas
next;
}
#------------------------------------------------------------
# Section 3. Continuing with non-blank and non-comment tokens
#------------------------------------------------------------
$token = $rtoken_K->[_TOKEN_];
# Set ci values appropriate for most tokens:
my $ci_this = $ci_next;
$ci_next = $ci_next_next;
# Now change these ci values as necessary for special cases...
#----------------------------
# Section 4. Container tokens
#----------------------------
if ( $rtoken_K->[_TYPE_SEQUENCE_] ) {
my $seqno = $rtoken_K->[_TYPE_SEQUENCE_];
#-------------------------------------
# Section 4.1 Opening container tokens
#-------------------------------------
if ( $is_opening_sequence_token{$token} ) {
my $level = $rtoken_K->[_LEVEL_];
# Default ci values for the closing token, to be modified
# as necessary:
my $ci_close = $ci_next;
my $ci_close_next = $ci_next_next;
my $Kc =
$type eq '?'
? $K_closing_ternary->{$seqno}
: $K_closing_container->{$seqno};
# $Kn = $self->K_next_nonblank($KK);
my $Kn;
if ( $KK < $Klimit ) {
$Kn = $KK + 1;
if ( $rLL->[$Kn]->[_TYPE_] eq 'b' && $Kn < $Klimit ) {
$Kn += 1;
}
}
# $Kcn = $self->K_next_code($Kc);
my $Kcn;
if ( $Kc && $Kc < $Klimit ) {
$Kcn = $Kc + 1;
if ( $rLL->[$Kcn]->[_TYPE_] eq 'b' && $Kcn < $Klimit ) {
$Kcn += 1;
}
if ( $rLL->[$Kcn]->[_TYPE_] eq '#' ) {
$Kcn = $self->K_next_code($Kcn);
}
}
my $opening_level_jump =
$Kn ? $rLL->[$Kn]->[_LEVEL_] - $level : 0;
# initialize ci_next_next to its standard value
$ci_next_next = 1;
# Default: ci of first item of list with level jump is same as
# ci of first item of container
if ( $opening_level_jump > 0 ) {
$ci_next = $rparent->{_ci_open_next};
}
my ( $comma_count, $semicolon_count );
my $rtype_count = $rtype_count_by_seqno->{$seqno};
if ($rtype_count) {
$comma_count = $rtype_count->{','};
$semicolon_count = $rtype_count->{';'};
# Do not include a terminal semicolon in the count (the
# comma_count has already been corrected by respace_tokens)
# We only need to know if there are semicolons or not, so
# for speed we can just do this test if the count is 1.
if ( $semicolon_count && $semicolon_count == 1 ) {
my $Kcm = $self->K_previous_code($Kc);
if ( $rLL->[$Kcm]->[_TYPE_] eq ';' ) {
$semicolon_count--;
}
}
}
my $container_type;
#-------------------------
# Section 4.1.1 Code Block
#-------------------------
my $block_type = $rblock_type_of_seqno->{$seqno};
if ($block_type) {
$container_type = 'Block';
# set default depending on block type
$ci_close = 0;
my $no_semicolon =
$is_block_without_semicolon{$block_type}
|| $ris_sub_block->{$seqno}
|| $last_type eq 'J';
if ( !$no_semicolon ) {
# Optional fix for block types sort/map/etc which use
# zero ci at terminal brace if previous keyword had
# zero ci. This will cause sort/map/grep filter blocks
# to line up. Note that sub 'undo_ci' will also try to
# do this, so this is not a critical operation.
if ( $is_block_with_ci{$block_type} ) {
my $parent_seqno = $rparent->{_seqno};
my $rtype_count_p =
$rtype_count_by_seqno->{$parent_seqno};
if (
# only do this within containers
$parent_seqno != SEQ_ROOT
# only in containers without ',' and ';'
&& !$rparent->{_comma_count}
&& !$rparent->{_semicolon_count}
&& $map_block_follows->($seqno)
)
{
if ($ci_last) {
$ci_close = $ci_this;
}
}
else {
$ci_close = $ci_this;
}
}
# keep ci if certain operators follow (fix c202/t024)
if ( !$ci_close && $Kcn ) {
my $type_kcn = $rLL->[$Kcn]->[_TYPE_];
my $token_kcn = $rLL->[$Kcn]->[_TOKEN_];
if ( $type_kcn =~ /^(\.|\&\&|\|\|)$/
|| $type_kcn eq 'k' && $is_and_or{$token_kcn} )
{
$ci_close = $ci_this;
}
}
}
if ( $rparent->{_container_type} ne 'Ternary' ) {
$ci_this = 0;
}
$ci_next = 0;
$ci_close_next = $ci_close;
}
#----------------------
# Section 4.1.2 Ternary
#----------------------
elsif ( $type eq '?' ) {
$container_type = 'Ternary';
if ( $rparent->{_container_type} eq 'List'
&& !$rparent->{_ci_open_next} )
{
$ci_this = 0;
$ci_close = 0;
}
# redo ci of any preceding comments if necessary
# at an outermost ? (which has no level jump)
if ( !$opening_level_jump ) {
$redo_preceding_comment_ci->( $KK, $ci_this );
}
}
#-------------------------------
# Section 4.1.3 Logical or List?
#-------------------------------
else {
my $is_logical = $is_container_label_type_for_ci{$last_type}
&& $is_logical_container_for_ci{$last_token}
# Part 1 of optional patch to get agreement with previous
# ci This makes almost no difference in a typical program
# because we will seldom break within an array index.
|| $type eq '[' && SET_CI_OPTION_0;
if ( !$is_logical && $token eq '(' ) {
# 'foreach' and 'for' paren contents are treated as
# logical except for C-style 'for'
if ( $last_type eq 'k' ) {
$is_logical ||= $last_token eq 'foreach';
# C-style 'for' container will be type 'List'
if ( $last_token eq 'for' ) {
$is_logical =
!( $rtype_count && $rtype_count->{'f'} );
}
}
# Check for 'for' and 'foreach' loops with iterators
elsif ( $last_type eq 'i' && defined($Kcn) ) {
my $seqno_kcn = $rLL->[$Kcn]->[_TYPE_SEQUENCE_];
my $type_kcn = $rLL->[$Kcn]->[_TOKEN_];
if ( $seqno_kcn && $type_kcn eq '{' ) {
my $block_type_kcn =
$rblock_type_of_seqno->{$seqno_kcn};
$is_logical ||= $block_type_kcn
&& ( $block_type_kcn eq 'for'
|| $block_type_kcn eq 'foreach' );
}
# Search backwards for 'for'/'foreach' with
# iterator in case user is running from an editor
# and did not include the block (fixes case
# 'xci.in').
my $Km = $self->K_previous_code($KK);
foreach ( 0 .. 2 ) {
$Km = $self->K_previous_code($Km);
last unless defined($Km);
last unless $rLL->[$Km]->[_TYPE_] eq 'k';
my $tok = $rLL->[$Km]->[_TOKEN_];
next if $tok eq 'my';
$is_logical ||=
( $tok eq 'for' || $tok eq 'foreach' );
last;
}
}
elsif ( $last_token eq '(' ) {
$is_logical ||=
$rparent->{_container_type} eq 'Logical';
}
else {
## ok - none of the above
}
}
#------------------------
# Section 4.1.3.1 Logical
#------------------------
if ($is_logical) {
$container_type = 'Logical';
# Pass ci though an '!'
if ( $last_type eq '!' ) { $ci_this = $ci_last }
$ci_next_next = 0;
$ci_close_next = $ci_this;
# Part 2 of optional patch to get agreement with
# previous ci
if ( $type eq '[' && SET_CI_OPTION_0 ) {
$ci_next_next = $ci_this;
# Undo ci at a chain of indexes or hash keys
if ( $last_type eq '}' ) {
$ci_this = $ci_last;
}
}
if ($opening_level_jump) {
$ci_next = 0;
}
}
#---------------------
# Section 4.1.3.2 List
#---------------------
else {
# Here 'List' is a catchall for none of the above types
$container_type = 'List';
# lists in blocks ...
if ( $rparent->{_container_type} eq 'Block' ) {
# undo ci if another closing token follows
if ( defined($Kcn) ) {
my $closing_level_jump =
$rLL->[$Kcn]->[_LEVEL_] - $level;
if ( $closing_level_jump < 0 ) {
$ci_close = $ci_this;
}
}
}
# lists not in blocks ...
else {
if ( !$rparent->{_comma_count} ) {
$ci_close = $ci_this;
# undo ci at binary op after right paren if no
# commas in container; fixes t027, t028
if ( $ci_close_next != $ci_close
&& defined($Kcn)
&& $bin_op_type{ $rLL->[$Kcn]->[_TYPE_] } )
{
$ci_close_next = $ci_close;
}
}
if ( $rparent->{_container_type} eq 'Ternary' ) {
$ci_next = 0;
}
}
# Undo ci at a chain of indexes or hash keys
if ( $token ne '(' && $last_type eq '}' ) {
$ci_this = $ci_close = $ci_last;
}
}
}
#---------------------------------------
# Section 4.1.4 Store opening token info
#---------------------------------------
# Most closing tokens should align with their opening tokens.
if (
$type eq '{'
&& $token ne '('
&& $is_list_end_type{$last_type}
# avoid asub blocks, which may have prototypes ending in '}'
&& !$ris_asub_block->{$seqno}
)
{
$ci_close = $ci_this;
}
# Closing ci must never be less than opening
if ( $ci_close < $ci_this ) { $ci_close = $ci_this }
push @{$rstack}, $rparent;
$rparent = {
_seqno => $seqno,
_container_type => $container_type,
_ci_next_next => $ci_next_next,
_ci_open => $ci_this,
_ci_open_next => $ci_next,
_ci_close => $ci_close,
_ci_close_next => $ci_close_next,
_comma_count => $comma_count,
_semicolon_count => $semicolon_count,
_Kc => $Kc,
};
}
#-------------------------------------
# Section 4.2 Closing container tokens
#-------------------------------------
else {
my $seqno_test = $rparent->{_seqno};
if ( $seqno_test ne $seqno ) {
# Shouldn't happen if we are processing balanced text.
# (Unbalanced text should go out verbatim)
DEVEL_MODE
&& Fault("stack error: $seqno_test != $seqno\n");
}
# Use ci_this, ci_next values set by the matching opening token:
$ci_this = $rparent->{_ci_close};
$ci_next = $rparent->{_ci_close_next};
my $ci_open_old = $rparent->{_ci_open};
# Then pop the stack and use the parent ci_next_next value:
if ( @{$rstack} ) {
$rparent = pop @{$rstack};
$ci_next_next = $rparent->{_ci_next_next};
}
else {
# Shouldn't happen if we are processing balanced text.
DEVEL_MODE && Fault("empty stack - shouldn't happen\n");
}
# Fix: undo ci at a closing token followed by a closing token.
# Goal is to keep formatting independent of the existence of a
# trailing comma or semicolon.
if ( $ci_this > 0 && !$ci_open_old && !$rparent->{_ci_close} ) {
my $Kc = $rparent->{_Kc};
my $Kn = $self->K_next_code($KK);
if ( $Kc && $Kn && $Kc == $Kn ) {
$ci_this = $ci_next = 0;
}
}
}
}
#---------------------------------
# Section 5. Semicolons and Labels
#---------------------------------
# The next token after a ';' and label (type 'J') starts a new stmt
# The ci after a C-style for ';' (type 'f') is handled similarly.
elsif ( $type eq ';' || $type eq 'J' || $type eq 'f' ) {
$ci_next = 0;
if ( $is_closing_type{$last_type} ) { $ci_this = $ci_last }
}
#--------------------
# Section 6. Keywords
#--------------------
# Undo ci after a format statement
elsif ( $type eq 'k' ) {
if ( substr( $token, 0, 6 ) eq 'format' ) {
$ci_next = 0;
}
}
#------------------
# Section 7. Commas
#------------------
# A comma and the subsequent item normally have ci undone
# unless ci has been set at a lower level
elsif ( $type eq ',' ) {
if ( $rparent->{_container_type} eq 'List' ) {
$ci_this = $ci_next = $rparent->{_ci_open_next};
}
}
#---------------------------------
# Section 8. Hanging side comments
#---------------------------------
# Treat hanging side comments like blanks
elsif ( $type eq 'q' && $token eq EMPTY_STRING ) {
$ci_next = $ci_this;
$rtoken_K->[_CI_LEVEL_] = $ci_this;
# 'next' to avoid saving last_ values for blanks and commas
next;
}
else {
## ok - not a special type for ci
}
# Save debug info if requested
DEBUG_SET_CI && do {
my $seqno = $rtoken_K->[_TYPE_SEQUENCE_];
my $level = $rtoken_K->[_LEVEL_];
my $ci = $rtoken_K->[_CI_LEVEL_];
if ( $ci > 1 ) { $ci = 1 }
my $tok = $token;
my $last_tok = $last_token;
$tok =~ s/\t//g;
$last_tok =~ s/\t//g;
$tok = length($tok) > 3 ? substr( $tok, 0, 8 ) : $tok;
$last_tok =
length($last_tok) > 3 ? substr( $last_tok, 0, 8 ) : $last_tok;
$tok =~ s/["']//g;
$last_tok =~ s/["']//g;
my $block_type;
$block_type = $rblock_type_of_seqno->{$seqno} if ($seqno);
$block_type = EMPTY_STRING unless ($block_type);
my $ptype = $rparent->{_container_type};
my $pname = $ptype;
my $error =
$ci_this == $ci ? EMPTY_STRING : $type eq 'b' ? "error" : "ERROR";
if ($error) { $saw_ci_diff{$KK} = 1 }
my $lno = $rtoken_K->[_LINE_INDEX_] + 1;
$debug_lines[$KK] = <<EOM;
$lno\t$ci\t$ci_this\t$ci_next\t$last_type\t$last_tok\t$type\t$tok\t$seqno\t$level\t$pname\t$block_type\t$error
EOM
};
#----------------------------------
# Store the ci value for this token
#----------------------------------
$rtoken_K->[_CI_LEVEL_] = $ci_this;
# Remember last nonblank, non-comment token info for the next pass
$ci_last = $ci_this;
$last_token = $token;
$last_type = $type;
} ## End main loop over tokens
#----------------------
# Post-loop operations:
#----------------------
# if the logfile is saved, we need to save the leading ci of
# each old line of code.
if ( $self->[_save_logfile_] ) {
foreach my $line_of_tokens ( @{$rlines} ) {
my $line_type = $line_of_tokens->{_line_type};
next if ( $line_type ne 'CODE' );
my ( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} };
next if ( !defined($Kfirst) );
$line_of_tokens->{_ci_level_0} = $rLL->[$Kfirst]->[_CI_LEVEL_];
}
}
if (DEBUG_SET_CI) {
my @output_lines;
foreach my $KK ( 0 .. $Klimit ) {
my $line = $debug_lines[$KK];
if ($line) {
my $Kp = $self->K_previous_code($KK);
my $Kn = $self->K_next_code($KK);
if ( DEBUG_SET_CI > 1
|| $Kp && $saw_ci_diff{$Kp}
|| $saw_ci_diff{$KK}
|| $Kn && $saw_ci_diff{$Kn} )
{
push @output_lines, $line;
}
}
}
if (@output_lines) {
unshift @output_lines, <<EOM;
lno\tci\tci_this\tci_next\tlast_type\tlast_tok\ttype\ttok\tseqno\tlevel\tpname\tblock_type\terror?
EOM
foreach my $line (@output_lines) {
chomp $line;
print {*STDOUT} $line, "\n";
}
}
}
return;
} ## end sub set_ci
sub set_CODE_type {
my ($self) = @_;
# Examine each line of code and set a flag '$CODE_type' to describe it.
# Also return a list of lines with side comments.
my $rLL = $self->[_rLL_];
my $rlines = $self->[_rlines_];
my $rOpts_format_skipping_begin = $rOpts->{'format-skipping-begin'};
my $rOpts_format_skipping_end = $rOpts->{'format-skipping-end'};
my $rOpts_static_block_comment_prefix =
$rOpts->{'static-block-comment-prefix'};
# Remember indexes of lines with side comments
my @ix_side_comments;
my $In_format_skipping_section = 0;
my $Saw_VERSION_in_this_file = 0;
my $has_side_comment = 0;
my $last_line_had_side_comment = 0;
my ( $Kfirst, $Klast );
my $CODE_type;
# Loop to set CODE_type
# Possible CODE_types
# 'VB' = Verbatim - line goes out verbatim (a quote)
# 'FS' = Format Skipping - line goes out verbatim
# 'BL' = Blank Line
# 'HSC' = Hanging Side Comment - fix this hanging side comment
# 'SBCX'= Static Block Comment Without Leading Space
# 'SBC' = Static Block Comment
# 'BC' = Block Comment - an ordinary full line comment
# 'IO' = Indent Only - line goes out unchanged except for indentation
# 'NIN' = No Internal Newlines - line does not get broken
# 'VER' = VERSION statement
# '' = ordinary line of code with no restrictions
my $ix_line = -1;
foreach my $line_of_tokens ( @{$rlines} ) {
$ix_line++;
my $line_type = $line_of_tokens->{_line_type};
my $last_CODE_type = $CODE_type;
$CODE_type = EMPTY_STRING;
if ( $line_type ne 'CODE' ) {
next;
}
my $input_line = $line_of_tokens->{_line_text};
my $Klast_prev = $Klast;
( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} };
my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1;
my $is_block_comment;
if ( $jmax >= 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
if ( $jmax == 0 ) { $is_block_comment = 1; }
else { $has_side_comment = 1 }
}
# Write line verbatim if we are in a formatting skip section
if ($In_format_skipping_section) {
# Note: extra space appended to comment simplifies pattern matching
if (
$is_block_comment
# optional fast pre-check
&& ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#>>>'
|| $rOpts_format_skipping_end )
&& ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
/$format_skipping_pattern_end/
)
{
$In_format_skipping_section = 0;
my $input_line_no = $line_of_tokens->{_line_number};
write_logfile_entry(
"Line $input_line_no: Exiting format-skipping section\n");
}
elsif (
$is_block_comment
# optional fast pre-check
&& ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#<<<'
|| $rOpts_format_skipping_begin )
&& $rOpts_format_skipping
&& ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
/$format_skipping_pattern_begin/
)
{
# warn of duplicate starting comment lines, git #118
my $input_line_no = $line_of_tokens->{_line_number};
warning(
"Already in format-skipping section which started at line $In_format_skipping_section\n",
$input_line_no
);
}
else {
## ok - not at a format skipping control line
}
$CODE_type = 'FS';
next;
}
# Check for a continued quote..
if ( $line_of_tokens->{_starting_in_quote} ) {
# A line which is entirely a quote or pattern must go out
# verbatim. Note: the \n is contained in $input_line.
if ( $jmax <= 0 ) {
if ( $self->[_save_logfile_] && $input_line =~ /\t/ ) {
my $input_line_number = $line_of_tokens->{_line_number};
$self->note_embedded_tab($input_line_number);
}
$CODE_type = 'VB';
next;
}
}
# See if we are entering a formatting skip section
if (
$is_block_comment
# optional fast pre-check
&& ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#<<<'
|| $rOpts_format_skipping_begin )
&& $rOpts_format_skipping
&& ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
/$format_skipping_pattern_begin/
)
{
my $input_line_no = $line_of_tokens->{_line_number};
$In_format_skipping_section = $input_line_no;
write_logfile_entry(
"Line $input_line_no: Entering format-skipping section\n");
$CODE_type = 'FS';
next;
}
# ignore trailing blank tokens (they will get deleted later)
if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
$jmax--;
}
# blank line..
if ( $jmax < 0 ) {
$CODE_type = 'BL';
next;
}
# Handle comments
if ($is_block_comment) {
# see if this is a static block comment (starts with ## by default)
my $is_static_block_comment = 0;
my $no_leading_space = substr( $input_line, 0, 1 ) eq '#';
if (
# optional fast pre-check
(
substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 2 ) eq '##'
|| $rOpts_static_block_comment_prefix
)
&& $rOpts_static_block_comments
&& $input_line =~ /$static_block_comment_pattern/
)
{
$is_static_block_comment = 1;
}
# Check for comments which are line directives
# Treat exactly as static block comments without leading space
# reference: perlsyn, near end, section Plain Old Comments (Not!)
# example: '# line 42 "new_filename.plx"'
if (
$no_leading_space
&& $input_line =~ m{^\# \s*
line \s+ (\d+) \s*
(?:\s("?)([^"]+)\2)? \s*
$}x
)
{
$is_static_block_comment = 1;
}
# look for hanging side comment ...
if (
$last_line_had_side_comment # this follows as side comment
&& !$no_leading_space # with some leading space, and
&& !$is_static_block_comment # this is not a static comment
)
{
# continuing an existing HSC chain?
if ( $last_CODE_type eq 'HSC' ) {
$has_side_comment = 1;
$CODE_type = 'HSC';
next;
}
# starting a new HSC chain?
if (
$rOpts->{'hanging-side-comments'} # user is allowing
# hanging side comments
# like this
&& ( defined($Klast_prev) && $Klast_prev > 1 )
# and the previous side comment was not static (issue c070)
&& !(
$rOpts->{'static-side-comments'}
&& $rLL->[$Klast_prev]->[_TOKEN_] =~
/$static_side_comment_pattern/
)
)
{
# and it is not a closing side comment (issue c070).
my $K_penult = $Klast_prev - 1;
$K_penult -= 1 if ( $rLL->[$K_penult]->[_TYPE_] eq 'b' );
my $follows_csc =
( $rLL->[$K_penult]->[_TOKEN_] eq '}'
&& $rLL->[$K_penult]->[_TYPE_] eq '}'
&& $rLL->[$Klast_prev]->[_TOKEN_] =~
/$closing_side_comment_prefix_pattern/ );
if ( !$follows_csc ) {
$has_side_comment = 1;
$CODE_type = 'HSC';
next;
}
}
}
if ($is_static_block_comment) {
$CODE_type = $no_leading_space ? 'SBCX' : 'SBC';
next;
}
elsif ($last_line_had_side_comment
&& !$rOpts_maximum_consecutive_blank_lines
&& $rLL->[$Kfirst]->[_LEVEL_] > 0 )
{
# Emergency fix to keep a block comment from becoming a hanging
# side comment. This fix is for the case that blank lines
# cannot be inserted. There is related code in sub
# 'process_line_of_CODE'
$CODE_type = 'SBCX';
next;
}
else {
$CODE_type = 'BC';
next;
}
}
# End of comments. Handle a line of normal code:
if ($rOpts_indent_only) {
$CODE_type = 'IO';
next;
}
if ( !$rOpts_add_newlines ) {
$CODE_type = 'NIN';
next;
}
# Patch needed for MakeMaker. Do not break a statement
# in which $VERSION may be calculated. See MakeMaker.pm;
# this is based on the coding in it.
# The first line of a file that matches this will be eval'd:
# /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
# Examples:
# *VERSION = \'1.01';
# ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
# We will pass such a line straight through without breaking
# it unless -npvl is used.
# Patch for problem reported in RT #81866, where files
# had been flattened into a single line and couldn't be
# tidied without -npvl. There are two parts to this patch:
# First, it is not done for a really long line (80 tokens for now).
# Second, we will only allow up to one semicolon
# before the VERSION. We need to allow at least one semicolon
# for statements like this:
# require Exporter; our $VERSION = $Exporter::VERSION;
# where both statements must be on a single line for MakeMaker
if ( !$Saw_VERSION_in_this_file
&& $jmax < 80
&& $input_line =~
/^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
{
$Saw_VERSION_in_this_file = 1;
write_logfile_entry("passing VERSION line; -npvl deactivates\n");
# This code type has lower priority than others
$CODE_type = 'VER';
next;
}
}
continue {
$line_of_tokens->{_code_type} = $CODE_type;
$last_line_had_side_comment = $has_side_comment;
if ($has_side_comment) {
push @ix_side_comments, $ix_line;
$has_side_comment = 0;
}
}
return \@ix_side_comments;
} ## end sub set_CODE_type
sub find_non_indenting_braces {
my ( $self, $rix_side_comments ) = @_;
# Find and mark all non-indenting braces in this file.
# Given:
# $rix_side_comments = index of lines which have side comments
# Find and save the line indexes of these special side comments in:
# $self->[_rseqno_non_indenting_brace_by_ix_];
# Non-indenting braces are opening braces of the form
# { #<<< ...
# which do not cause an increase in indentation level.
# They are enabled with the --non-indenting-braces, or -nib, flag.
return unless ( $rOpts->{'non-indenting-braces'} );
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $rlines = $self->[_rlines_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $rseqno_non_indenting_brace_by_ix =
$self->[_rseqno_non_indenting_brace_by_ix_];
foreach my $ix ( @{$rix_side_comments} ) {
my $line_of_tokens = $rlines->[$ix];
my $line_type = $line_of_tokens->{_line_type};
if ( $line_type ne 'CODE' ) {
# shouldn't happen
DEVEL_MODE && Fault("unexpected line_type=$line_type\n");
next;
}
my $rK_range = $line_of_tokens->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
if ( !defined($Kfirst) || $rLL->[$Klast]->[_TYPE_] ne '#' ) {
# shouldn't happen
DEVEL_MODE && Fault("did not get a comment\n");
next;
}
next if ( $Klast <= $Kfirst ); # maybe HSC
my $token_sc = $rLL->[$Klast]->[_TOKEN_];
my $K_m = $Klast - 1;
my $type_m = $rLL->[$K_m]->[_TYPE_];
if ( $type_m eq 'b' && $K_m > $Kfirst ) {
$K_m--;
$type_m = $rLL->[$K_m]->[_TYPE_];
}
my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
if ($seqno_m) {
my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
# The pattern ends in \s but we have removed the newline, so
# we added it back for the match. That way we require an exact
# match to the special string and also allow additional text.
$token_sc .= "\n";
if ( $block_type_m
&& $is_opening_type{$type_m}
&& $token_sc =~ /$non_indenting_brace_pattern/ )
{
$rseqno_non_indenting_brace_by_ix->{$ix} = $seqno_m;
}
}
}
return;
} ## end sub find_non_indenting_braces
sub delete_side_comments {
my ( $self, $rix_side_comments ) = @_;
# Given a list of indexes of lines with side comments, handle any
# requested side comment deletions.
my $rLL = $self->[_rLL_];
my $rlines = $self->[_rlines_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $rseqno_non_indenting_brace_by_ix =
$self->[_rseqno_non_indenting_brace_by_ix_];
foreach my $ix ( @{$rix_side_comments} ) {
my $line_of_tokens = $rlines->[$ix];
my $line_type = $line_of_tokens->{_line_type};
# This fault shouldn't happen because we only saved CODE lines with
# side comments in the TASK 1 loop above.
if ( $line_type ne 'CODE' ) {
if (DEVEL_MODE) {
my $lno = $ix + 1;
Fault(<<EOM);
Hit unexpected line_type = '$line_type' near line $lno while deleting side comments, should be 'CODE'
EOM
}
next;
}
my $CODE_type = $line_of_tokens->{_code_type};
my $rK_range = $line_of_tokens->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
if ( !defined($Kfirst) || $rLL->[$Klast]->[_TYPE_] ne '#' ) {
if (DEVEL_MODE) {
my $lno = $ix + 1;
Fault(<<EOM);
Did not find side comment near line $lno while deleting side comments
EOM
}
next;
}
my $delete_side_comment =
$rOpts_delete_side_comments
&& ( $Klast > $Kfirst || $CODE_type eq 'HSC' )
&& (!$CODE_type
|| $CODE_type eq 'HSC'
|| $CODE_type eq 'IO'
|| $CODE_type eq 'NIN' );
# Do not delete special control side comments
if ( $rseqno_non_indenting_brace_by_ix->{$ix} ) {
$delete_side_comment = 0;
}
if (
$rOpts_delete_closing_side_comments
&& !$delete_side_comment
&& $Klast > $Kfirst
&& ( !$CODE_type
|| $CODE_type eq 'HSC'
|| $CODE_type eq 'IO'
|| $CODE_type eq 'NIN' )
)
{
my $token = $rLL->[$Klast]->[_TOKEN_];
my $K_m = $Klast - 1;
my $type_m = $rLL->[$K_m]->[_TYPE_];
if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- }
my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
if ($seqno_m) {
my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
if ( $block_type_m
&& $token =~ /$closing_side_comment_prefix_pattern/
&& $block_type_m =~ /$closing_side_comment_list_pattern/ )
{
$delete_side_comment = 1;
}
}
} ## end if ( $rOpts_delete_closing_side_comments...)
if ($delete_side_comment) {
# We are actually just changing the side comment to a blank.
# This may produce multiple blanks in a row, but sub respace_tokens
# will check for this and fix it.
$rLL->[$Klast]->[_TYPE_] = 'b';
$rLL->[$Klast]->[_TOKEN_] = SPACE;
# The -io option outputs the line text, so we have to update
# the line text so that the comment does not reappear.
if ( $CODE_type eq 'IO' ) {
my $line = EMPTY_STRING;
foreach my $KK ( $Kfirst .. $Klast - 1 ) {
$line .= $rLL->[$KK]->[_TOKEN_];
}
$line =~ s/\s+$//;
$line_of_tokens->{_line_text} = $line . "\n";
}
# If we delete a hanging side comment the line becomes blank.
if ( $CODE_type eq 'HSC' ) { $line_of_tokens->{_code_type} = 'BL' }
}
}
return;
} ## end sub delete_side_comments
sub dump_verbatim {
my $self = shift;
# Dump the input file to the output verbatim. This is called when
# there is a severe error and formatted output cannot be made.
my $rlines = $self->[_rlines_];
foreach my $line ( @{$rlines} ) {
my $input_line = $line->{_line_text};
$self->write_unindented_line($input_line);
}
return;
} ## end sub dump_verbatim
my %wU;
my %wiq;
my %is_witPS;
my %is_sigil;
my %is_nonlist_keyword;
my %is_nonlist_type;
my %is_s_y_m_slash;
my %is_unexpected_equals;
my %is_ascii_type;
BEGIN {
# added 'U' to fix cases b1125 b1126 b1127
my @q = qw(w U);
@{wU}{@q} = (1) x scalar(@q);
@q = qw(w i q Q G C Z);
@{wiq}{@q} = (1) x scalar(@q);
@q = qw(w i t P S); # Fix for c250: added new types 'P', 'S', formerly 'i'
@{is_witPS}{@q} = (1) x scalar(@q);
@q = qw($ & % * @);
@{is_sigil}{@q} = (1) x scalar(@q);
# Parens following these keywords will not be marked as lists. Note that
# 'for' is not included and is handled separately, by including 'f' in the
# hash %is_counted_type, since it may or may not be a c-style for loop.
@q = qw( if elsif unless and or );
@is_nonlist_keyword{@q} = (1) x scalar(@q);
# Parens following these types will not be marked as lists
@q = qw( && || );
@is_nonlist_type{@q} = (1) x scalar(@q);
@q = qw( s y m / );
@is_s_y_m_slash{@q} = (1) x scalar(@q);
@q = qw( = == != );
@is_unexpected_equals{@q} = (1) x scalar(@q);
# We can always skip expensive length_function->() calls for these
# ascii token types
@q = qw#
b k L R ; ( { [ ? : ] } ) f t n v F p m pp mm
.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
( ) <= >= == =~ !~ != ++ -- /= x=
... **= <<= >>= &&= ||= //= <=>
+ - / * | % ! x ~ = \ ? : . < > ^ &
#;
push @q, ',';
@is_ascii_type{@q} = (1) x scalar(@q);
} ## end BEGIN
{ #<<< begin closure respace_tokens
my $rLL_new; # This will be the new array of tokens
# These are variables in $self
my $rLL;
my $length_function;
my $K_closing_ternary;
my $K_opening_ternary;
my $rchildren_of_seqno;
my $rhas_broken_code_block;
my $rhas_broken_list;
my $rhas_broken_list_with_lec;
my $rhas_code_block;
my $rhas_list;
my $rhas_ternary;
my $ris_assigned_structure;
my $ris_broken_container;
my $ris_excluded_lp_container;
my $ris_list_by_seqno;
my $ris_permanently_broken;
my $rlec_count_by_seqno;
my $roverride_cab3;
my $rparent_of_seqno;
my $rtype_count_by_seqno;
my $rblock_type_of_seqno;
my $K_opening_container;
my $K_closing_container;
my %K_first_here_doc_by_seqno;
my $last_nonblank_code_type;
my $last_nonblank_code_token;
my $last_nonblank_block_type;
my $last_last_nonblank_code_type;
my $last_last_nonblank_code_token;
my %seqno_stack;
my %K_old_opening_by_seqno;
my $depth_next;
my $depth_next_max;
my $cumulative_length;
# Variables holding the current line info
my $Ktoken_vars;
my $Kfirst_old;
my $Klast_old;
my $Klast_old_code;
my $CODE_type;
my $rwhitespace_flags;
sub initialize_respace_tokens_closure {
my ($self) = @_;
$rLL_new = []; # This is the new array
$rLL = $self->[_rLL_];
$length_function = $self->[_length_function_];
$K_closing_ternary = $self->[_K_closing_ternary_];
$K_opening_ternary = $self->[_K_opening_ternary_];
$rchildren_of_seqno = $self->[_rchildren_of_seqno_];
$rhas_broken_code_block = $self->[_rhas_broken_code_block_];
$rhas_broken_list = $self->[_rhas_broken_list_];
$rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
$rhas_code_block = $self->[_rhas_code_block_];
$rhas_list = $self->[_rhas_list_];
$rhas_ternary = $self->[_rhas_ternary_];
$ris_assigned_structure = $self->[_ris_assigned_structure_];
$ris_broken_container = $self->[_ris_broken_container_];
$ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
$ris_list_by_seqno = $self->[_ris_list_by_seqno_];
$ris_permanently_broken = $self->[_ris_permanently_broken_];
$rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
$roverride_cab3 = $self->[_roverride_cab3_];
$rparent_of_seqno = $self->[_rparent_of_seqno_];
$rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
$rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
%K_first_here_doc_by_seqno = ();
$last_nonblank_code_type = ';';
$last_nonblank_code_token = ';';
$last_nonblank_block_type = EMPTY_STRING;
$last_last_nonblank_code_type = ';';
$last_last_nonblank_code_token = ';';
%seqno_stack = ();
%K_old_opening_by_seqno = (); # Note: old K index
$depth_next = 0;
$depth_next_max = 0;
# we will be setting token lengths as we go
$cumulative_length = 0;
$Ktoken_vars = undef; # the old K value of $rtoken_vars
$Kfirst_old = undef; # min K of old line
$Klast_old = undef; # max K of old line
$Klast_old_code = undef; # K of last token if side comment
$CODE_type = EMPTY_STRING;
# Set the whitespace flags, which indicate the token spacing preference.
$rwhitespace_flags = $self->set_whitespace_flags();
# Note that $K_opening_container and $K_closing_container have values
# defined in sub get_line() for the previous K indexes. They were needed
# in case option 'indent-only' was set, and we didn't get here. We no
# longer need those and will eliminate them now to avoid any possible
# mixing of old and new values. This must be done AFTER the call to
# set_whitespace_flags, which needs these.
$K_opening_container = $self->[_K_opening_container_] = {};
$K_closing_container = $self->[_K_closing_container_] = {};
return;
} ## end sub initialize_respace_tokens_closure
sub respace_tokens {
my $self = shift;
#--------------------------------------------------------------------------
# This routine is called once per file to do as much formatting as possible
# before new line breaks are set.
#--------------------------------------------------------------------------
# Return parameters:
# Set $severe_error=true if processing must terminate immediately
my ( $severe_error, $rqw_lines );
# We change any spaces in --indent-only mode
if ( $rOpts->{'indent-only'} ) {
# We need to define lengths for -indent-only to avoid undefs, even
# though these values are not actually needed for option --indent-only.
$rLL = $self->[_rLL_];
$cumulative_length = 0;
foreach my $item ( @{$rLL} ) {
my $token = $item->[_TOKEN_];
my $token_length =
$length_function ? $length_function->($token) : length($token);
$cumulative_length += $token_length;
$item->[_TOKEN_LENGTH_] = $token_length;
$item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
}
return ( $severe_error, $rqw_lines );
}
# This routine makes all necessary and possible changes to the tokenization
# after the initial tokenization of the file. This is a tedious routine,
# but basically it consists of inserting and deleting whitespace between
# nonblank tokens according to the selected parameters. In a few cases
# non-space characters are added, deleted or modified.
# The goal of this routine is to create a new token array which only needs
# the definition of new line breaks and padding to complete formatting. In
# a few cases we have to cheat a little to achieve this goal. In
# particular, we may not know if a semicolon will be needed, because it
# depends on how the line breaks go. To handle this, we include the
# semicolon as a 'phantom' which can be displayed as normal or as an empty
# string.
# Method: The old tokens are copied one-by-one, with changes, from the old
# linear storage array $rLL to a new array $rLL_new.
# (re-)initialize closure variables for this problem
$self->initialize_respace_tokens_closure();
#--------------------------------
# Main over all lines of the file
#--------------------------------
my $rlines = $self->[_rlines_];
my $line_type = EMPTY_STRING;
my $last_K_out;
foreach my $line_of_tokens ( @{$rlines} ) {
my $input_line_number = $line_of_tokens->{_line_number};
my $last_line_type = $line_type;
$line_type = $line_of_tokens->{_line_type};
next unless ( $line_type eq 'CODE' );
$CODE_type = $line_of_tokens->{_code_type};
if ( $CODE_type eq 'BL' ) {
my $seqno = $seqno_stack{ $depth_next - 1 };
if ( defined($seqno) ) {
$self->[_rblank_and_comment_count_]->{$seqno} += 1;
$self->set_permanently_broken($seqno)
if (!$ris_permanently_broken->{$seqno}
&& $rOpts_maximum_consecutive_blank_lines );
}
}
my $rK_range = $line_of_tokens->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
next unless defined($Kfirst);
( $Kfirst_old, $Klast_old ) = ( $Kfirst, $Klast );
$Klast_old_code = $Klast_old;
# Be sure an old K value is defined for sub store_token
$Ktoken_vars = $Kfirst;
# Check for correct sequence of token indexes...
# An error here means that sub write_line() did not correctly
# package the tokenized lines as it received them. If we
# get a fault here it has not output a continuous sequence
# of K values. Or a line of CODE may have been mis-marked as
# something else. There is no good way to continue after such an
# error.
if ( defined($last_K_out) ) {
if ( $Kfirst != $last_K_out + 1 ) {
Fault_Warn(
"Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
);
$severe_error = 1;
return ( $severe_error, $rqw_lines );
}
}
else {
# The first token should always have been given index 0 by sub
# write_line()
if ( $Kfirst != 0 ) {
Fault("Program Bug: first K is $Kfirst but should be 0");
}
}
$last_K_out = $Klast;
# Handle special lines of code
if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
# CODE_types are as follows.
# 'BL' = Blank Line
# 'VB' = Verbatim - line goes out verbatim
# 'FS' = Format Skipping - line goes out verbatim, no blanks
# 'IO' = Indent Only - only indentation may be changed
# 'NIN' = No Internal Newlines - line does not get broken
# 'HSC'=Hanging Side Comment - fix this hanging side comment
# 'BC'=Block Comment - an ordinary full line comment
# 'SBC'=Static Block Comment - a block comment which does not get
# indented
# 'SBCX'=Static Block Comment Without Leading Space
# 'VER'=VERSION statement
# '' or (undefined) - no restrictions
# For a hanging side comment we insert an empty quote before
# the comment so that it becomes a normal side comment and
# will be aligned by the vertical aligner
if ( $CODE_type eq 'HSC' ) {
# Safety Check: This must be a line with one token (a comment)
my $rvars_Kfirst = $rLL->[$Kfirst];
if ( $Kfirst == $Klast && $rvars_Kfirst->[_TYPE_] eq '#' ) {
# Note that even if the flag 'noadd-whitespace' is set, we
# will make an exception here and allow a blank to be
# inserted to push the comment to the right. We can think
# of this as an adjustment of indentation rather than
# whitespace between tokens. This will also prevent the
# hanging side comment from getting converted to a block
# comment if whitespace gets deleted, as for example with
# the -extrude and -mangle options.
my $rcopy =
copy_token_as_type( $rvars_Kfirst, 'q', EMPTY_STRING );
$self->store_token($rcopy);
$rcopy = copy_token_as_type( $rvars_Kfirst, 'b', SPACE );
$self->store_token($rcopy);
$self->store_token($rvars_Kfirst);
next;
}
else {
# This line was mis-marked by sub scan_comment. Catch in
# DEVEL_MODE, otherwise try to repair and keep going.
Fault(
"Program bug. A hanging side comment has been mismarked"
) if (DEVEL_MODE);
$CODE_type = EMPTY_STRING;
$line_of_tokens->{_code_type} = $CODE_type;
}
}
# Copy tokens unchanged
foreach my $KK ( $Kfirst .. $Klast ) {
$Ktoken_vars = $KK;
$self->store_token( $rLL->[$KK] );
}
next;
}
# Handle normal line..
# Define index of last token before any side comment for comma counts
my $type_end = $rLL->[$Klast_old_code]->[_TYPE_];
if ( ( $type_end eq '#' || $type_end eq 'b' )
&& $Klast_old_code > $Kfirst_old )
{
$Klast_old_code--;
if ( $rLL->[$Klast_old_code]->[_TYPE_] eq 'b'
&& $Klast_old_code > $Kfirst_old )
{
$Klast_old_code--;
}
}
# Insert any essential whitespace between lines
# if last line was normal CODE.
# Patch for rt #125012: use K_previous_code rather than '_nonblank'
# because comments may disappear.
# Note that we must do this even if --noadd-whitespace is set
if ( $last_line_type eq 'CODE' ) {
my $type_next = $rLL->[$Kfirst]->[_TYPE_];
my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
if (
is_essential_whitespace(
$last_last_nonblank_code_token,
$last_last_nonblank_code_type,
$last_nonblank_code_token,
$last_nonblank_code_type,
$token_next,
$type_next,
)
)
{
$self->store_token();
}
}
#-----------------------------------------------
# Inner loop to respace tokens on a line of code
#-----------------------------------------------
# The inner loop is in a separate sub for clarity
$self->respace_tokens_inner_loop( $Kfirst, $Klast, $input_line_number );
} # End line loop
# finalize data structures
$self->respace_post_loop_ops();
# Reset memory to be the new array
$self->[_rLL_] = $rLL_new;
my $Klimit;
if ( @{$rLL_new} ) { $Klimit = @{$rLL_new} - 1 }
$self->[_Klimit_] = $Klimit;
# During development, verify that the new array still looks okay.
DEVEL_MODE && $self->check_token_array();
# update the token limits of each line
( $severe_error, $rqw_lines ) = $self->resync_lines_and_tokens();
return ( $severe_error, $rqw_lines );
} ## end sub respace_tokens
sub respace_tokens_inner_loop {
my ( $self, $Kfirst, $Klast, $input_line_number ) = @_;
#-----------------------------------------------------------------
# Loop to copy all tokens on one line, making any spacing changes,
# while also collecting information needed by later subs.
#-----------------------------------------------------------------
foreach my $KK ( $Kfirst .. $Klast ) {
# TODO: consider eliminating this closure var by passing directly to
# store_token following pattern of store_token_to_go.
$Ktoken_vars = $KK;
my $rtoken_vars = $rLL->[$KK];
my $type = $rtoken_vars->[_TYPE_];
# Handle a blank space ...
if ( $type eq 'b' ) {
# Delete it if not wanted by whitespace rules
# or we are deleting all whitespace
# Note that whitespace flag is a flag indicating whether a
# white space BEFORE the token is needed
next if ( $KK >= $Klast ); # skip terminal blank
my $Knext = $KK + 1;
if ($rOpts_freeze_whitespace) {
$self->store_token($rtoken_vars);
next;
}
my $ws = $rwhitespace_flags->[$Knext];
if ( $ws == -1
|| $rOpts_delete_old_whitespace )
{
my $token_next = $rLL->[$Knext]->[_TOKEN_];
my $type_next = $rLL->[$Knext]->[_TYPE_];
my $do_not_delete = is_essential_whitespace(
$last_last_nonblank_code_token,
$last_last_nonblank_code_type,
$last_nonblank_code_token,
$last_nonblank_code_type,
$token_next,
$type_next,
);
# Note that repeated blanks will get filtered out here
next unless ($do_not_delete);
}
# make it just one character
$rtoken_vars->[_TOKEN_] = SPACE;
$self->store_token($rtoken_vars);
next;
}
my $token = $rtoken_vars->[_TOKEN_];
# Handle a sequenced token ... i.e. one of ( ) { } [ ] ? :
if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) {
# One of ) ] } ...
if ( $is_closing_token{$token} ) {
my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
my $block_type = $rblock_type_of_seqno->{$type_sequence};
#---------------------------------------------
# check for semicolon addition in a code block
#---------------------------------------------
if ($block_type) {
# if not preceded by a ';' ..
if ( $last_nonblank_code_type ne ';' ) {
# tentatively insert a semicolon if appropriate
$self->add_phantom_semicolon($KK)
if $rOpts->{'add-semicolons'};
}
}
#----------------------------------------------------------
# check for addition/deletion of a trailing comma in a list
#----------------------------------------------------------
else {
# if this is a list ..
my $rtype_count = $rtype_count_by_seqno->{$type_sequence};
if ( $rtype_count
&& $rtype_count->{','}
&& !$rtype_count->{';'}
&& !$rtype_count->{'f'} )
{
# if NOT preceded by a comma..
if ( $last_nonblank_code_type ne ',' ) {
# insert a comma if requested
if ( $rOpts_add_trailing_commas
&& %trailing_comma_rules )
{
$self->add_trailing_comma( $KK, $Kfirst,
$trailing_comma_rules{$token} );
}
}
# if preceded by a comma ..
else {
# delete a trailing comma if requested
my $deleted;
if ( $rOpts_delete_trailing_commas
&& %trailing_comma_rules )
{
$deleted =
$self->delete_trailing_comma( $KK, $Kfirst,
$trailing_comma_rules{$token} );
}
# delete a weld-interfering comma if requested
if ( !$deleted
&& $rOpts_delete_weld_interfering_commas
&& $is_closing_type{
$last_last_nonblank_code_type} )
{
$self->delete_weld_interfering_comma($KK);
}
}
}
}
}
}
# Modify certain tokens here for whitespace
# The following is not yet done, but could be:
# sub (x x x)
# ( $type =~ /^[witPS]$/ )
elsif ( $is_witPS{$type} ) {
# index() is several times faster than a regex test with \s here
## $token =~ /\s/
if ( index( $token, SPACE ) > 0 || index( $token, "\t" ) > 0 ) {
# change '$ var' to '$var' etc
# change '@ ' to '@'
# Examples: <<snippets/space1.in>>
my $ord = ord( substr( $token, 1, 1 ) );
if (
# quick test for possible blank at second char
$ord > 0 && ( $ord < ORD_PRINTABLE_MIN
|| $ord > ORD_PRINTABLE_MAX )
)
{
my ( $sigil, $word ) = split /\s+/, $token, 2;
# $sigil =~ /^[\$\&\%\*\@]$/ )
if ( $is_sigil{$sigil} ) {
$token = $sigil;
$token .= $word if ( defined($word) ); # fix c104
$rtoken_vars->[_TOKEN_] = $token;
}
}
# trim identifiers of trailing blanks which can occur
# under some unusual circumstances, such as if the
# identifier 'witch' has trailing blanks on input here:
#
# sub
# witch
# () # prototype may be on new line ...
# ...
my $ord_ch = ord( substr( $token, -1, 1 ) );
if (
# quick check for possible ending space
$ord_ch > 0 && ( $ord_ch < ORD_PRINTABLE_MIN
|| $ord_ch > ORD_PRINTABLE_MAX )
)
{
$token =~ s/\s+$//g;
$rtoken_vars->[_TOKEN_] = $token;
}
# Fixed for c250 to use 'S' for sub definitions
if ( $type eq 'S' ) {
# -spp = 0 : no space before opening prototype paren
# -spp = 1 : stable (follow input spacing)
# -spp = 2 : always space before opening prototype paren
if ( !defined($rOpts_space_prototype_paren)
|| $rOpts_space_prototype_paren == 1 )
{
## default: stable
}
elsif ( $rOpts_space_prototype_paren == 0 ) {
$token =~ s/\s+\(/\(/;
}
elsif ( $rOpts_space_prototype_paren == 2 ) {
$token =~ s/\(/ (/;
}
else {
# bad n value for -spp=n
# just use the default
}
# one space max, and no tabs
$token =~ s/\s+/ /g;
$rtoken_vars->[_TOKEN_] = $token;
$self->[_ris_special_identifier_token_]->{$token} = 'sub';
}
# and trim spaces in package statements (added for c250)
elsif ( $type eq 'P' ) {
# clean up spaces in package identifiers, like
# "package Bob::Dog;"
if ( $token =~ s/\s+/ /g ) {
$rtoken_vars->[_TOKEN_] = $token;
$self->[_ris_special_identifier_token_]->{$token} =
'package';
}
}
else {
# it is rare to arrive here (identifier with spaces)
}
}
}
# handle semicolons
elsif ( $type eq ';' ) {
# Remove unnecessary semicolons, but not after bare
# blocks, where it could be unsafe if the brace is
# mis-tokenized.
if (
$rOpts->{'delete-semicolons'}
&& (
(
$last_nonblank_block_type
&& $last_nonblank_code_type eq '}'
&& (
$is_block_without_semicolon{
$last_nonblank_block_type}
|| $last_nonblank_block_type =~ /$SUB_PATTERN/
|| $last_nonblank_block_type =~ /^\w+:$/
)
)
|| $last_nonblank_code_type eq ';'
)
)
{
# This looks like a deletable semicolon, but even if a
# semicolon can be deleted it is not necessarily best to do
# so. We apply these additional rules for deletion:
# - Always ok to delete a ';' at the end of a line
# - Never delete a ';' before a '#' because it would
# promote it to a block comment.
# - If a semicolon is not at the end of line, then only
# delete if it is followed by another semicolon or closing
# token. This includes the comment rule. It may take
# two passes to get to a final state, but it is a little
# safer. For example, keep the first semicolon here:
# eval { sub bubba { ok(0) }; ok(0) } || ok(1);
# It is not required but adds some clarity.
my $ok_to_delete = 1;
if ( $KK < $Klast ) {
my $Kn = $self->K_next_nonblank($KK);
if ( defined($Kn) && $Kn <= $Klast ) {
my $next_nonblank_token_type = $rLL->[$Kn]->[_TYPE_];
$ok_to_delete = $next_nonblank_token_type eq ';'
|| $next_nonblank_token_type eq '}';
}
}
# do not delete only nonblank token in a file
else {
my $Kp = $self->K_previous_code( undef, $rLL_new );
my $Kn = $self->K_next_nonblank($KK);
$ok_to_delete = defined($Kn) || defined($Kp);
}
if ($ok_to_delete) {
$self->note_deleted_semicolon($input_line_number);
next;
}
else {
write_logfile_entry("Extra ';'\n");
}
}
}
# Old patch to add space to something like "x10".
# Note: This is now done in the Tokenizer, but this code remains
# for reference.
elsif ( $type eq 'n' ) {
if ( substr( $token, 0, 1 ) eq 'x' && $token =~ /^x\d+/ ) {
$token =~ s/x/x /;
$rtoken_vars->[_TOKEN_] = $token;
if (DEVEL_MODE) {
Fault(<<EOM);
Near line $input_line_number, Unexpected need to split a token '$token' - this should now be done by the Tokenizer
EOM
}
}
}
# check for a qw quote
elsif ( $type eq 'q' ) {
# trim blanks from right of qw quotes
# (To avoid trimming qw quotes use -ntqw; the tokenizer handles
# this)
$token =~ s/\s*$//;
$rtoken_vars->[_TOKEN_] = $token;
if ( $self->[_save_logfile_] && $token =~ /\t/ ) {
$self->note_embedded_tab($input_line_number);
}
if ( $rwhitespace_flags->[$KK] == WS_YES
&& @{$rLL_new}
&& $rLL_new->[-1]->[_TYPE_] ne 'b'
&& $rOpts_add_whitespace )
{
$self->store_token();
}
$self->store_token($rtoken_vars);
next;
} ## end if ( $type eq 'q' )
# delete repeated commas if requested
elsif ( $type eq ',' ) {
if ( $last_nonblank_code_type eq ','
&& $rOpts->{'delete-repeated-commas'} )
{
# Could note this deletion as a possible future update:
## $self->note_deleted_comma($input_line_number);
next;
}
# remember input line index of first comma if -wtc is used
if (%trailing_comma_rules) {
my $seqno = $seqno_stack{ $depth_next - 1 };
if ( defined($seqno)
&& !defined( $self->[_rfirst_comma_line_index_]->{$seqno} )
)
{
$self->[_rfirst_comma_line_index_]->{$seqno} =
$rtoken_vars->[_LINE_INDEX_];
}
}
}
# change 'LABEL :' to 'LABEL:'
elsif ( $type eq 'J' ) {
$token =~ s/\s+//g;
$rtoken_vars->[_TOKEN_] = $token;
}
# check a quote for problems
elsif ( $type eq 'Q' ) {
$self->check_Q( $KK, $Kfirst, $input_line_number )
if ( $self->[_save_logfile_] );
}
else {
## ok - no special processing for this token type
}
# Store this token with possible previous blank
if ( $rwhitespace_flags->[$KK] == WS_YES
&& @{$rLL_new}
&& $rLL_new->[-1]->[_TYPE_] ne 'b'
&& $rOpts_add_whitespace )
{
$self->store_token();
}
$self->store_token($rtoken_vars);
} # End token loop
return;
} ## end sub respace_tokens_inner_loop
sub respace_post_loop_ops {
my ($self) = @_;
# Walk backwards through the tokens, making forward links to sequence items.
if ( @{$rLL_new} ) {
my $KNEXT;
foreach my $KK ( reverse( 0 .. @{$rLL_new} - 1 ) ) {
$rLL_new->[$KK]->[_KNEXT_SEQ_ITEM_] = $KNEXT;
if ( $rLL_new->[$KK]->[_TYPE_SEQUENCE_] ) { $KNEXT = $KK }
}
$self->[_K_first_seq_item_] = $KNEXT;
}
# Find and remember lists by sequence number
foreach my $seqno ( keys %{$K_opening_container} ) {
my $K_opening = $K_opening_container->{$seqno};
next unless defined($K_opening);
# code errors may leave undefined closing tokens
my $K_closing = $K_closing_container->{$seqno};
next unless defined($K_closing);
my $lx_open = $rLL_new->[$K_opening]->[_LINE_INDEX_];
my $lx_close = $rLL_new->[$K_closing]->[_LINE_INDEX_];
my $line_diff = $lx_close - $lx_open;
$ris_broken_container->{$seqno} = $line_diff;
# See if this is a list
my $is_list;
my $rtype_count = $rtype_count_by_seqno->{$seqno};
if ($rtype_count) {
my $comma_count = $rtype_count->{','};
my $fat_comma_count = $rtype_count->{'=>'};
my $semicolon_count = $rtype_count->{';'};
if ( $rtype_count->{'f'} ) {
$semicolon_count += $rtype_count->{'f'};
}
# We will define a list to be a container with one or more commas
# and no semicolons. Note that we have included the semicolons
# in a 'for' container in the semicolon count to keep c-style for
# statements from being formatted as lists.
if ( ( $comma_count || $fat_comma_count ) && !$semicolon_count ) {
$is_list = 1;
# We need to do one more check for a parenthesized list:
# At an opening paren following certain tokens, such as 'if',
# we do not want to format the contents as a list.
if ( $rLL_new->[$K_opening]->[_TOKEN_] eq '(' ) {
my $Kp = $self->K_previous_code( $K_opening, $rLL_new );
if ( defined($Kp) ) {
my $type_p = $rLL_new->[$Kp]->[_TYPE_];
my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
$is_list =
$type_p eq 'k'
? !$is_nonlist_keyword{$token_p}
: !$is_nonlist_type{$type_p};
}
}
}
}
# Look for a block brace marked as uncertain. If the tokenizer thinks
# its guess is uncertain for the type of a brace following an unknown
# bareword then it adds a trailing space as a signal. We can fix the
# type here now that we have had a better look at the contents of the
# container. This fixes case b1085. To find the corresponding code in
# Tokenizer.pm search for 'b1085' with an editor.
my $block_type = $rblock_type_of_seqno->{$seqno};
if ( $block_type && substr( $block_type, -1, 1 ) eq SPACE ) {
# Always remove the trailing space
$block_type =~ s/\s+$//;
# Try to filter out parenless sub calls
my $Knn1 = $self->K_next_nonblank( $K_opening, $rLL_new );
my $Knn2;
if ( defined($Knn1) ) {
$Knn2 = $self->K_next_nonblank( $Knn1, $rLL_new );
}
my $type_nn1 = defined($Knn1) ? $rLL_new->[$Knn1]->[_TYPE_] : 'b';
my $type_nn2 = defined($Knn2) ? $rLL_new->[$Knn2]->[_TYPE_] : 'b';
# if ( $type_nn1 =~ /^[wU]$/ && $type_nn2 =~ /^[wiqQGCZ]$/ ) {
if ( $wU{$type_nn1} && $wiq{$type_nn2} ) {
$is_list = 0;
}
# Convert to a hash brace if it looks like it holds a list
if ($is_list) {
$block_type = EMPTY_STRING;
}
$rblock_type_of_seqno->{$seqno} = $block_type;
}
# Handle a list container
if ( $is_list && !$block_type ) {
$ris_list_by_seqno->{$seqno} = $seqno;
my $seqno_parent = $rparent_of_seqno->{$seqno};
my $depth = 0;
while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
$depth++;
# for $rhas_list we need to save the minimum depth
if ( !$rhas_list->{$seqno_parent}
|| $rhas_list->{$seqno_parent} > $depth )
{
$rhas_list->{$seqno_parent} = $depth;
}
if ($line_diff) {
$rhas_broken_list->{$seqno_parent} = 1;
# Patch1: We need to mark broken lists with non-terminal
# line-ending commas for the -bbx=2 parameter. This insures
# that the list will stay broken. Otherwise the flag
# -bbx=2 can be unstable. This fixes case b789 and b938.
# Patch2: Updated to also require either one fat comma or
# one more line-ending comma. Fixes cases b1069 b1070
# b1072 b1076.
if (
$rlec_count_by_seqno->{$seqno}
&& ( $rlec_count_by_seqno->{$seqno} > 1
|| $rtype_count_by_seqno->{$seqno}->{'=>'} )
)
{
$rhas_broken_list_with_lec->{$seqno_parent} = 1;
}
}
$seqno_parent = $rparent_of_seqno->{$seqno_parent};
}
}
# Handle code blocks ...
# The -lp option needs to know if a container holds a code block
elsif ( $block_type && $rOpts_line_up_parentheses ) {
my $seqno_parent = $rparent_of_seqno->{$seqno};
while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
$rhas_code_block->{$seqno_parent} = 1;
$rhas_broken_code_block->{$seqno_parent} = $line_diff;
$seqno_parent = $rparent_of_seqno->{$seqno_parent};
}
}
else {
## ok - none of the above
}
}
# Find containers with ternaries, needed for -lp formatting.
foreach my $seqno ( keys %{$K_opening_ternary} ) {
my $seqno_parent = $rparent_of_seqno->{$seqno};
while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
$rhas_ternary->{$seqno_parent} = 1;
$seqno_parent = $rparent_of_seqno->{$seqno_parent};
}
}
# Turn off -lp for containers with here-docs with text within a container,
# since they have their own fixed indentation. Fixes case b1081.
if ($rOpts_line_up_parentheses) {
foreach my $seqno ( keys %K_first_here_doc_by_seqno ) {
my $Kh = $K_first_here_doc_by_seqno{$seqno};
my $Kc = $K_closing_container->{$seqno};
my $line_Kh = $rLL_new->[$Kh]->[_LINE_INDEX_];
my $line_Kc = $rLL_new->[$Kc]->[_LINE_INDEX_];
next if ( $line_Kh == $line_Kc );
$ris_excluded_lp_container->{$seqno} = 1;
}
}
# Set a flag to turn off -cab=3 in complex structures. Otherwise,
# instability can occur. When it is overridden the behavior of the closest
# match, -cab=2, will be used instead. This fixes cases b1096 b1113.
if ( $rOpts_comma_arrow_breakpoints == 3 ) {
foreach my $seqno ( keys %{$K_opening_container} ) {
my $rtype_count = $rtype_count_by_seqno->{$seqno};
next unless ( $rtype_count && $rtype_count->{'=>'} );
# override -cab=3 if this contains a sub-list
if ( !defined( $roverride_cab3->{$seqno} ) ) {
if ( $rhas_list->{$seqno} ) {
$roverride_cab3->{$seqno} = 2;
}
# or if this is a sub-list of its parent container
else {
my $seqno_parent = $rparent_of_seqno->{$seqno};
if ( defined($seqno_parent)
&& $ris_list_by_seqno->{$seqno_parent} )
{
$roverride_cab3->{$seqno} = 2;
}
}
}
}
}
return;
} ## end sub respace_post_loop_ops
sub set_permanently_broken {
my ( $self, $seqno ) = @_;
# Mark this container, and all of its parent containers, as being
# permanently broken (for example, by containing a blank line). This
# is needed for certain list formatting operations.
while ( defined($seqno) ) {
$ris_permanently_broken->{$seqno} = 1;
$seqno = $rparent_of_seqno->{$seqno};
}
return;
} ## end sub set_permanently_broken
sub store_token {
my ( $self, $item ) = @_;
#------------------------------------------
# Store one token during respace operations
#------------------------------------------
# Input parameter:
# if defined => reference to a token
# if undef => make and store a blank space
# NOTE: called once per token so coding efficiency is critical.
# If no arg, then make and store a blank space
if ( !$item ) {
# - Never start the array with a space, and
# - Never store two consecutive spaces
if ( @{$rLL_new} && $rLL_new->[-1]->[_TYPE_] ne 'b' ) {
# Note that the level and ci_level of newly created spaces should
# be the same as the previous token. Otherwise the coding for the
# -lp option can create a blinking state in some rare cases.
# (see b1109, b1110).
$item = [];
$item->[_TYPE_] = 'b';
$item->[_TOKEN_] = SPACE;
$item->[_TYPE_SEQUENCE_] = EMPTY_STRING;
$item->[_LINE_INDEX_] = $rLL_new->[-1]->[_LINE_INDEX_];
$item->[_LEVEL_] = $rLL_new->[-1]->[_LEVEL_];
}
else { return }
}
# The next multiple assignment statements are significantly faster than
# doing them one-by-one.
my (
$type,
$token,
$type_sequence,
) = @{$item}[
_TYPE_,
_TOKEN_,
_TYPE_SEQUENCE_,
];
# Set the token length. Later it may be adjusted again if phantom or
# ignoring side comment lengths. It is always okay to calculate the length
# with $length_function->() if it is defined, but it is extremely slow so
# we avoid it and use the builtin length() for printable ascii tokens.
# Note: non-printable ascii characters (like tab) may get different lengths
# by the two methods, so we have to use $length_function for them.
my $token_length =
( $length_function
&& !$is_ascii_type{$type}
&& $token =~ /[[:^ascii:][:^print:]]/ )
? $length_function->($token)
: length($token);
# handle blanks
if ( $type eq 'b' ) {
# Do not output consecutive blanks. This situation should have been
# prevented earlier, but it is worth checking because later routines
# make this assumption.
if ( @{$rLL_new} && $rLL_new->[-1]->[_TYPE_] eq 'b' ) {
return;
}
}
# handle comments
elsif ( $type eq '#' ) {
# trim comments if necessary
my $ord = ord( substr( $token, -1, 1 ) );
if (
$ord > 0
&& ( $ord < ORD_PRINTABLE_MIN
|| $ord > ORD_PRINTABLE_MAX )
&& $token =~ s/\s+$//
)
{
$token_length =
$length_function ? $length_function->($token) : length($token);
$item->[_TOKEN_] = $token;
}
my $ignore_sc_length = $rOpts_ignore_side_comment_lengths;
# Ignore length of '## no critic' comments even if -iscl is not set
if ( !$ignore_sc_length
&& !$rOpts_ignore_perlcritic_comments
&& $token_length > 10
&& substr( $token, 1, 1 ) eq '#'
&& $token =~ /^##\s*no\s+critic\b/ )
{
# Is it a side comment or a block comment?
if ( $Ktoken_vars > $Kfirst_old ) {
# This is a side comment. If we do not ignore its length, and
# -iscl has not been set, then the line could be broken and
# perlcritic will complain. So this is essential:
$ignore_sc_length ||= 1;
# It would be a good idea to also make this behave like a
# static side comment, but this is not essential and would
# change existing formatting. So we will leave it to the user
# to set -ssc if desired.
}
else {
# This is a full-line (block) comment.
# It would be a good idea to make this behave like a static
# block comment, but this is not essential and would change
# existing formatting. So we will leave it to the user to
# set -sbc if desired
}
}
# Set length of ignored side comments as just 1
if ( $ignore_sc_length && ( !$CODE_type || $CODE_type eq 'HSC' ) ) {
$token_length = 1;
}
my $seqno = $seqno_stack{ $depth_next - 1 };
if ( defined($seqno) ) {
$self->[_rblank_and_comment_count_]->{$seqno} += 1
if ( $CODE_type eq 'BC' );
$self->set_permanently_broken($seqno)
if !$ris_permanently_broken->{$seqno};
}
}
# handle non-blanks and non-comments
else {
my $block_type;
# check for a sequenced item (i.e., container or ?/:)
if ($type_sequence) {
# This will be the index of this item in the new array
my $KK_new = @{$rLL_new};
if ( $is_opening_token{$token} ) {
$K_opening_container->{$type_sequence} = $KK_new;
$block_type = $rblock_type_of_seqno->{$type_sequence};
# Fix for case b1100: Count a line ending in ', [' as having
# a line-ending comma. Otherwise, these commas can be hidden
# with something like --opening-square-bracket-right
if ( $last_nonblank_code_type eq ','
&& $Ktoken_vars == $Klast_old_code
&& $Ktoken_vars > $Kfirst_old )
{
$rlec_count_by_seqno->{$type_sequence}++;
}
if ( $last_nonblank_code_type eq '='
|| $last_nonblank_code_type eq '=>' )
{
$ris_assigned_structure->{$type_sequence} =
$last_nonblank_code_type;
}
my $seqno_parent = $seqno_stack{ $depth_next - 1 };
$seqno_parent = SEQ_ROOT unless defined($seqno_parent);
push @{ $rchildren_of_seqno->{$seqno_parent} }, $type_sequence;
$rparent_of_seqno->{$type_sequence} = $seqno_parent;
$seqno_stack{$depth_next} = $type_sequence;
$K_old_opening_by_seqno{$type_sequence} = $Ktoken_vars;
$depth_next++;
if ( $depth_next > $depth_next_max ) {
$depth_next_max = $depth_next;
}
}
elsif ( $is_closing_token{$token} ) {
$K_closing_container->{$type_sequence} = $KK_new;
$block_type = $rblock_type_of_seqno->{$type_sequence};
# Do not include terminal commas in counts
if ( $last_nonblank_code_type eq ','
|| $last_nonblank_code_type eq '=>' )
{
$rtype_count_by_seqno->{$type_sequence}
->{$last_nonblank_code_type}--;
if ( $Ktoken_vars == $Kfirst_old
&& $last_nonblank_code_type eq ','
&& $rlec_count_by_seqno->{$type_sequence} )
{
$rlec_count_by_seqno->{$type_sequence}--;
}
}
# Update the stack...
$depth_next--;
}
else {
# For ternary, note parent but do not include as child
my $seqno_parent = $seqno_stack{ $depth_next - 1 };
$seqno_parent = SEQ_ROOT unless defined($seqno_parent);
$rparent_of_seqno->{$type_sequence} = $seqno_parent;
# These are not yet used but could be useful
if ( $token eq '?' ) {
$K_opening_ternary->{$type_sequence} = $KK_new;
}
elsif ( $token eq ':' ) {
$K_closing_ternary->{$type_sequence} = $KK_new;
}
else {
# We really shouldn't arrive here, just being cautious:
# The only sequenced types output by the tokenizer are the
# opening & closing containers and the ternary types. Each
# of those was checked above. So we would only get here
# if the tokenizer has been changed to mark some other
# tokens with sequence numbers.
if (DEVEL_MODE) {
Fault(
"Unexpected token type with sequence number: type='$type', seqno='$type_sequence'"
);
}
}
}
}
# Remember the most recent two non-blank, non-comment tokens.
# NOTE: the phantom semicolon code may change the output stack
# without updating these values. Phantom semicolons are considered
# the same as blanks for now, but future needs might change that.
# See the related note in sub 'add_phantom_semicolon'.
$last_last_nonblank_code_type = $last_nonblank_code_type;
$last_last_nonblank_code_token = $last_nonblank_code_token;
$last_nonblank_code_type = $type;
$last_nonblank_code_token = $token;
$last_nonblank_block_type = $block_type;
# count selected types
if ( $is_counted_type{$type} ) {
my $seqno = $seqno_stack{ $depth_next - 1 };
if ( defined($seqno) ) {
$rtype_count_by_seqno->{$seqno}->{$type}++;
# Count line-ending commas for -bbx
if ( $type eq ',' && $Ktoken_vars == $Klast_old_code ) {
$rlec_count_by_seqno->{$seqno}++;
}
# Remember index of first here doc target
if ( $type eq 'h' && !$K_first_here_doc_by_seqno{$seqno} ) {
my $KK_new = @{$rLL_new};
$K_first_here_doc_by_seqno{$seqno} = $KK_new;
}
}
}
}
# cumulative length is the length sum including this token
$cumulative_length += $token_length;
$item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
$item->[_TOKEN_LENGTH_] = $token_length;
# For reference, here is how to get the parent sequence number.
# This is not used because it is slower than finding it on the fly
# in sub parent_seqno_by_K:
# my $seqno_parent =
# $type_sequence && $is_opening_token{$token}
# ? $seqno_stack{ $depth_next - 2 }
# : $seqno_stack{ $depth_next - 1 };
# my $KK = @{$rLL_new};
# $rseqno_of_parent_by_K->{$KK} = $seqno_parent;
# and finally, add this item to the new array
push @{$rLL_new}, $item;
return;
} ## end sub store_token
sub add_phantom_semicolon {
my ( $self, $KK ) = @_;
# The token at old index $KK is a closing block brace, and not preceded
# by a semicolon. Before we push it onto the new token list, we may
# want to add a phantom semicolon which can be activated if the the
# block is broken on output.
# We are only adding semicolons for certain block types
my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
return unless ($type_sequence);
my $block_type = $rblock_type_of_seqno->{$type_sequence};
return unless ($block_type);
return
unless ( $ok_to_add_semicolon_for_block_type{$block_type}
|| $block_type =~ /^(sub|package)/
|| $block_type =~ /^\w+\:$/ );
# Find the most recent token in the new token list
my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
return unless ( defined($Kp) ); # shouldn't happen except for bad input
my $type_p = $rLL_new->[$Kp]->[_TYPE_];
my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
my $type_sequence_p = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
# Do not add a semicolon if...
return
if (
# it would follow a comment (and be isolated)
$type_p eq '#'
# it follows a code block ( because they are not always wanted
# there and may add clutter)
|| $type_sequence_p && $rblock_type_of_seqno->{$type_sequence_p}
# it would follow a label
|| $type_p eq 'J'
# it would be inside a 'format' statement (and cause syntax error)
|| ( $type_p eq 'k'
&& $token_p =~ /format/ )
);
# Do not add a semicolon if it would impede a weld with an immediately
# following closing token...like this
# { ( some code ) }
# ^--No semicolon can go here
# look at the previous token... note use of the _NEW rLL array here,
# but sequence numbers are invariant.
my $seqno_inner = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
# If it is also a CLOSING token we have to look closer...
if (
$seqno_inner
&& $is_closing_token{$token_p}
# we only need to look if there is just one inner container..
&& defined( $rchildren_of_seqno->{$type_sequence} )
&& @{ $rchildren_of_seqno->{$type_sequence} } == 1
)
{
# Go back and see if the corresponding two OPENING tokens are also
# together. Note that we are using the OLD K indexing here:
my $K_outer_opening = $K_old_opening_by_seqno{$type_sequence};
if ( defined($K_outer_opening) ) {
my $K_nxt = $self->K_next_nonblank($K_outer_opening);
if ( defined($K_nxt) ) {
my $seqno_nxt = $rLL->[$K_nxt]->[_TYPE_SEQUENCE_];
# Is the next token after the outer opening the same as
# our inner closing (i.e. same sequence number)?
# If so, do not insert a semicolon here.
return if ( $seqno_nxt && $seqno_nxt == $seqno_inner );
}
}
}
# We will insert an empty semicolon here as a placeholder. Later, if
# it becomes the last token on a line, we will bring it to life. The
# advantage of doing this is that (1) we just have to check line
# endings, and (2) the phantom semicolon has zero width and therefore
# won't cause needless breaks of one-line blocks.
my $Ktop = -1;
if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
&& $want_left_space{';'} == WS_NO )
{
# convert the blank into a semicolon..
# be careful: we are working on the new stack top
# on a token which has been stored.
my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE );
# Convert the existing blank to:
# a phantom semicolon for one_line_block option = 0 or 1
# a real semicolon for one_line_block option = 2
my $tok = EMPTY_STRING;
my $len_tok = 0;
if ( $rOpts_one_line_block_semicolons == 2 ) {
$tok = ';';
$len_tok = 1;
}
$rLL_new->[$Ktop]->[_TOKEN_] = $tok;
$rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok;
$rLL_new->[$Ktop]->[_TYPE_] = ';';
$self->[_rtype_count_by_seqno_]->{$type_sequence}->{';'}++;
# NOTE: we are changing the output stack without updating variables
# $last_nonblank_code_type, etc. Future needs might require that
# those variables be updated here. For now, it seems ok to skip
# this.
# Then store a new blank
$self->store_token($rcopy);
}
else {
# Patch for issue c078: keep line indexes in order. If the top
# token is a space that we are keeping (due to '-wls=';') then
# we have to check that old line indexes stay in order.
# In very rare
# instances in which side comments have been deleted and converted
# into blanks, we may have filtered down multiple blanks into just
# one. In that case the top blank may have a higher line number
# than the previous nonblank token. Although the line indexes of
# blanks are not really significant, we need to keep them in order
# in order to pass error checks.
if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' ) {
my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_];
my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_];
if ( $new_top_ix < $old_top_ix ) {
$rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix;
}
}
my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', EMPTY_STRING );
$self->store_token($rcopy);
}
return;
} ## end sub add_phantom_semicolon
sub add_trailing_comma {
# Implement the --add-trailing-commas flag to the line end before index $KK:
my ( $self, $KK, $Kfirst, $trailing_comma_rule ) = @_;
# Input parameter:
# $KK = index of closing token in old ($rLL) token list
# which starts a new line and is not preceded by a comma
# $Kfirst = index of first token on the current line of input tokens
# $add_flags = user control flags
# For example, we might want to add a comma here:
# bless {
# _name => $name,
# _price => $price,
# _rebate => $rebate <------ location of possible bare comma
# }, $pkg;
# ^-------------------closing token at index $KK on new line
# Do not add a comma if it would follow a comment
my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
return unless ( defined($Kp) );
my $type_p = $rLL_new->[$Kp]->[_TYPE_];
return if ( $type_p eq '#' );
# see if the user wants a trailing comma here
my $match =
$self->match_trailing_comma_rule( $KK, $Kfirst, $Kp,
$trailing_comma_rule, 1 );
# b1458 fix method 1: do not add if this would excess line length.
# This is more general than fix method 2, below, but the logic is not
# as clean. So this fix is currently deactivated.
if ( 0 && $match && $rOpts_delete_trailing_commas && $KK > 0 ) {
my $line_index = $rLL->[ $KK - 1 ]->[_LINE_INDEX_];
my $rlines = $self->[_rlines_];
my $line_of_tokens = $rlines->[$line_index];
my $input_line = $line_of_tokens->{_line_text};
my $len =
$length_function
? $length_function->($input_line) - 1
: length($input_line) - 1;
my $level = $rLL->[$Kfirst]->[_LEVEL_];
my $max_len = $maximum_line_length_at_level[$level];
if ( $len >= $max_len ) {
$match = 0;
}
}
# if so, add a comma
if ($match) {
my $Knew = $self->store_new_token( ',', ',', $Kp );
}
return;
} ## end sub add_trailing_comma
sub delete_trailing_comma {
my ( $self, $KK, $Kfirst, $trailing_comma_rule ) = @_;
# Apply the --delete-trailing-commas flag to the comma before index $KK
# Input parameter:
# $KK = index of a closing token in OLD ($rLL) token list
# which is preceded by a comma on the same line.
# $Kfirst = index of first token on the current line of input tokens
# $delete_option = user control flag
# Returns true if the comma was deleted
# For example, we might want to delete this comma:
# my @asset = ("FASMX", "FASGX", "FASIX",);
# | |^--------token at index $KK
# | ^------comma of interest
# ^-------------token at $Kfirst
# Verify that the previous token is a comma. Note that we are working in
# the new token list $rLL_new.
my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
return unless ( defined($Kp) );
if ( $rLL_new->[$Kp]->[_TYPE_] ne ',' ) {
# there must be a '#' between the ',' and closing token; give up.
return;
}
# Do not delete commas when formatting under stress to avoid instability.
# This fixes b1389, b1390, b1391, b1392. The $high_stress_level has
# been found to work well for trailing commas.
if ( $rLL_new->[$Kp]->[_LEVEL_] >= $high_stress_level ) {
return;
}
# See if the user wants this trailing comma
my $match =
$self->match_trailing_comma_rule( $KK, $Kfirst, $Kp,
$trailing_comma_rule, 0 );
# Patch: the --noadd-whitespace flag can cause instability in complex
# structures. In this case do not delete the comma. Fixes b1409.
if ( !$match && !$rOpts_add_whitespace ) {
my $Kn = $self->K_next_nonblank($KK);
if ( defined($Kn) ) {
my $type_n = $rLL->[$Kn]->[_TYPE_];
if ( $type_n ne ';' && $type_n ne '#' ) { return }
}
}
# b1458 fix method 2: do not remove a comma after a leading brace type 'R'
# since it is under stress and could become unstable. This is a more
# specific fix but the logic is cleaner than method 1.
if ( !$match
&& $rOpts_add_trailing_commas
&& $rLL->[$Kfirst]->[_TYPE_] eq 'R' )
{
# previous old token should be the comma..
my $Kp_old = $self->K_previous_nonblank( $KK, $rLL );
if ( defined($Kp_old)
&& $Kp_old > $Kfirst
&& $rLL->[$Kp_old]->[_TYPE_] eq ',' )
{
# if the comma follows the first token of the line ..
my $Kpp_old = $self->K_previous_nonblank( $Kp_old, $rLL );
if ( defined($Kpp_old) && $Kpp_old eq $Kfirst ) {
# do not delete it
$match = 1;
}
}
}
# If no match, delete it
if ( !$match ) {
return $self->unstore_last_nonblank_token(',');
}
return;
} ## end sub delete_trailing_comma
sub delete_weld_interfering_comma {
my ( $self, $KK ) = @_;
# Apply the flag '--delete-weld-interfering-commas' to the comma
# before index $KK
# Input parameter:
# $KK = index of a closing token in OLD ($rLL) token list
# which is preceded by a comma on the same line.
# Returns true if the comma was deleted
# For example, we might want to delete this comma:
# my $tmpl = { foo => {no_override => 1, default => 42}, };
# || ^------$KK
# |^---$Kp
# $Kpp---^
#
# Note that:
# index $KK is in the old $rLL array, but
# indexes $Kp and $Kpp are in the new $rLL_new array.
my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
return unless ($type_sequence);
# Find the previous token and verify that it is a comma.
my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
return unless ( defined($Kp) );
if ( $rLL_new->[$Kp]->[_TYPE_] ne ',' ) {
# it is not a comma, so give up ( it is probably a '#' )
return;
}
# This must be the only comma in this list
my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence};
return
unless ( defined($rtype_count)
&& $rtype_count->{','}
&& $rtype_count->{','} == 1 );
# Back up to the previous closing token
my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
return unless ( defined($Kpp) );
my $seqno_pp = $rLL_new->[$Kpp]->[_TYPE_SEQUENCE_];
my $type_pp = $rLL_new->[$Kpp]->[_TYPE_];
# The containers must be nesting (i.e., sequence numbers must differ by 1 )
if ( $seqno_pp && $is_closing_type{$type_pp} ) {
if ( $seqno_pp == $type_sequence + 1 ) {
# remove the ',' from the top of the new token list
return $self->unstore_last_nonblank_token(',');
}
}
return;
} ## end sub delete_weld_interfering_comma
sub unstore_last_nonblank_token {
my ( $self, $type ) = @_;
# remove the most recent nonblank token from the new token list
# Input parameter:
# $type = type to be removed (for safety check)
# Returns true if success
# false if error
# This was written and is used for removing commas, but might
# be useful for other tokens. If it is ever used for other tokens
# then the issue of what to do about the other variables, such
# as token counts and the '$last...' vars needs to be considered.
# Safety check, shouldn't happen
if ( @{$rLL_new} < 3 ) {
DEVEL_MODE && Fault("not enough tokens on stack to remove '$type'\n");
return;
}
my ( $rcomma, $rblank );
# case 1: pop comma from top of stack
if ( $rLL_new->[-1]->[_TYPE_] eq $type ) {
$rcomma = pop @{$rLL_new};
}
# case 2: pop blank and then comma from top of stack
elsif ($rLL_new->[-1]->[_TYPE_] eq 'b'
&& $rLL_new->[-2]->[_TYPE_] eq $type )
{
$rblank = pop @{$rLL_new};
$rcomma = pop @{$rLL_new};
}
# case 3: error, shouldn't happen unless bad call
else {
DEVEL_MODE && Fault("Could not find token type '$type' to remove\n");
return;
}
# A note on updating vars set by sub store_token for this comma: If we
# reduce the comma count by 1 then we also have to change the variable
# $last_nonblank_code_type to be $last_last_nonblank_code_type because
# otherwise sub store_token is going to ALSO reduce the comma count.
# Alternatively, we can leave the count alone and the
# $last_nonblank_code_type alone. Then sub store_token will produce
# the correct result. This is simpler and is done here.
# Now add a blank space after the comma if appropriate.
# Some unusual spacing controls might need another iteration to
# reach a final state.
if ( $rLL_new->[-1]->[_TYPE_] ne 'b' ) {
if ( defined($rblank) ) {
$rblank->[_CUMULATIVE_LENGTH_] -= 1; # fix for deleted comma
push @{$rLL_new}, $rblank;
}
}
return 1;
} ## end sub unstore_last_nonblank_token
sub match_trailing_comma_rule {
my ( $self, $KK, $Kfirst, $Kp, $trailing_comma_rule, $if_add ) = @_;
# Decide if a trailing comma rule is matched.
# Input parameter:
# $KK = index of closing token in old ($rLL) token list which follows
# the location of a possible trailing comma. See diagram below.
# $Kfirst = (old) index of first token on the current line of input tokens
# $Kp = index of previous nonblank token in new ($rLL_new) array
# $trailing_comma_rule = packed user control flags
# $if_add = true if adding comma, false if deleting comma
# Returns:
# false if no match
# true if match
# For example, we might be checking for addition of a comma here:
# bless {
# _name => $name,
# _price => $price,
# _rebate => $rebate <------ location of possible trailing comma
# }, $pkg;
# ^-------------------closing token at index $KK
return unless ($trailing_comma_rule);
my ( $trailing_comma_style, $paren_flag ) = @{$trailing_comma_rule};
# List of $trailing_comma_style values:
# undef stable: do not change
# '0' : no list should have a trailing comma
# '1' or '*' : every list should have a trailing comma
# 'm' a multi-line list should have a trailing commas
# 'b' trailing commas should be 'bare' (comma followed by newline)
# 'h' lists of key=>value pairs with a bare trailing comma
# 'i' same as s=h but also include any list with no more than about one
# comma per line
# ' ' or -wtc not defined : leave trailing commas unchanged [DEFAULT].
# Note: an interesting generalization would be to let an upper case
# letter denote the negation of styles 'm', 'b', 'h', 'i'. This might
# be useful for undoing operations. It would be implemented as a wrapper
# around this routine.
#-----------------------------------------
# No style defined : do not add or delete
#-----------------------------------------
if ( !defined($trailing_comma_style) ) { return !$if_add }
#----------------------------------------
# Set some flags describing this location
#----------------------------------------
my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
return unless ($type_sequence);
my $closing_token = $rLL->[$KK]->[_TOKEN_];
my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence};
return unless ( defined($rtype_count) && $rtype_count->{','} );
my $is_permanently_broken =
$self->[_ris_permanently_broken_]->{$type_sequence};
# Note that _ris_broken_container_ also stores the line diff
# but it is not available at this early stage.
my $K_opening = $self->[_K_opening_container_]->{$type_sequence};
return if ( !defined($K_opening) );
# multiline definition 1: opening and closing tokens on different lines
my $iline_o = $rLL_new->[$K_opening]->[_LINE_INDEX_];
my $iline_c = $rLL->[$KK]->[_LINE_INDEX_];
my $line_diff_containers = $iline_c - $iline_o;
my $has_multiline_containers = $line_diff_containers > 0;
# multiline definition 2: first and last commas on different lines
my $iline_first = $self->[_rfirst_comma_line_index_]->{$type_sequence};
my $iline_last = $rLL_new->[$Kp]->[_LINE_INDEX_];
my $has_multiline_commas;
my $line_diff_commas = 0;
if ( !defined($iline_first) ) {
# shouldn't happen if caller checked comma count
my $type_kp = $rLL_new->[$Kp]->[_TYPE_];
Fault(
"at line $iline_last but line of first comma not defined, at Kp=$Kp, type=$type_kp\n"
) if (DEVEL_MODE);
}
else {
$line_diff_commas = $iline_last - $iline_first;
$has_multiline_commas = $line_diff_commas > 0;
}
# To avoid instability in edge cases, when adding commas we uses the
# multiline_commas definition, but when deleting we use multiline
# containers. This fixes b1384, b1396, b1397, b1398, b1400.
my $is_multiline =
$if_add ? $has_multiline_commas : $has_multiline_containers;
my $is_bare_multiline_comma = $is_multiline && $KK == $Kfirst;
my $match;
#----------------------------
# 0 : does not match any list
#----------------------------
if ( $trailing_comma_style eq '0' ) {
$match = 0;
}
#------------------------------
# '*' or '1' : matches any list
#------------------------------
elsif ( $trailing_comma_style eq '*' || $trailing_comma_style eq '1' ) {
$match = 1;
}
#-----------------------------
# 'm' matches a Multiline list
#-----------------------------
elsif ( $trailing_comma_style eq 'm' ) {
$match = $is_multiline;
}
#----------------------------------
# 'b' matches a Bare trailing comma
#----------------------------------
elsif ( $trailing_comma_style eq 'b' ) {
$match = $is_bare_multiline_comma;
}
#--------------------------------------------------------------------------
# 'h' matches a bare hash list with about 1 comma and 1 fat comma per line.
# 'i' matches a bare stable list with about 1 comma per line.
#--------------------------------------------------------------------------
elsif ( $trailing_comma_style eq 'h' || $trailing_comma_style eq 'i' ) {
# We can treat these together because they are similar.
# The set of 'i' matches includes the set of 'h' matches.
# the trailing comma must be bare for both 'h' and 'i'
return if ( !$is_bare_multiline_comma );
# There must be no more than one comma per line for both 'h' and 'i'
# The new_comma_count here will include the trailing comma.
my $new_comma_count = $rtype_count->{','};
$new_comma_count += 1 if ($if_add);
my $excess_commas = $new_comma_count - $line_diff_commas - 1;
if ( $excess_commas > 0 ) {
# Exception for a special edge case for option 'i': if the trailing
# comma is followed by a blank line or comment, then it cannot be
# covered. Then we can safely accept a small list to avoid
# instability (issue b1443).
if ( $trailing_comma_style eq 'i'
&& $iline_c - $rLL_new->[$Kp]->[_LINE_INDEX_] > 1
&& $new_comma_count <= 2 )
{
$match = 1;
}
# Patch for instability issue b1456: -boc can trick this test; so
# skip it when deleting commas to avoid possible instability
# with option 'h' in combination with -atc -dtc -boc;
elsif (
$trailing_comma_style eq 'h'
# this is a deletion (due to -dtc)
&& !$if_add
# -atc is also set
&& $rOpts_add_trailing_commas
# -boc is set and active
&& $rOpts_break_at_old_comma_breakpoints
&& !$rOpts_ignore_old_breakpoints
)
{
# ignore this test
}
else {
return;
}
}
# a list of key=>value pairs with at least 2 fat commas is a match
# for both 'h' and 'i'
my $fat_comma_count = $rtype_count->{'=>'};
if ( !$match && $fat_comma_count && $fat_comma_count >= 2 ) {
# comma count (including trailer) and fat comma count must differ by
# by no more than 1. This allows for some small variations.
my $comma_diff = $new_comma_count - $fat_comma_count;
$match = ( $comma_diff >= -1 && $comma_diff <= 1 );
}
# For 'i' only, a list that can be shown to be stable is a match
if ( !$match && $trailing_comma_style eq 'i' ) {
$match = (
$is_permanently_broken
|| ( $rOpts_break_at_old_comma_breakpoints
&& !$rOpts_ignore_old_breakpoints )
);
}
}
#-------------------------------------------------------------------------
# Unrecognized parameter. This should have been caught in the input check.
#-------------------------------------------------------------------------
else {
DEVEL_MODE && Fault("Unrecognized parameter '$trailing_comma_style'\n");
# do not add or delete
return !$if_add;
}
# Now do any special paren check
if ( $match
&& $paren_flag
&& $paren_flag ne '1'
&& $paren_flag ne '*'
&& $closing_token eq ')' )
{
$match &&=
$self->match_paren_control_flag( $type_sequence, $paren_flag,
$rLL_new );
}
# Fix for b1379, b1380, b1381, b1382, b1384 part 1. Mark trailing commas
# for use by -vtc logic to avoid instability when -dtc and -atc are both
# active.
if ($match) {
if ( $if_add && $rOpts_delete_trailing_commas
|| !$if_add && $rOpts_add_trailing_commas )
{
$self->[_ris_bare_trailing_comma_by_seqno_]->{$type_sequence} = 1;
# The combination of -atc and -dtc and -cab=3 can be unstable
# (b1394). So we deactivate -cab=3 in this case.
# A value of '0' or '4' is required for stability of case b1451.
if ( $rOpts_comma_arrow_breakpoints == 3 ) {
$self->[_roverride_cab3_]->{$type_sequence} = 0;
}
}
}
return $match;
} ## end sub match_trailing_comma_rule
sub store_new_token {
my ( $self, $type, $token, $Kp ) = @_;
# Create and insert a completely new token into the output stream
# Input parameters:
# $type = the token type
# $token = the token text
# $Kp = index of the previous token in the new list, $rLL_new
# Returns:
# $Knew = index in $rLL_new of the new token
# This operation is a little tricky because we are creating a new token and
# we have to take care to follow the requested whitespace rules.
my $Ktop = @{$rLL_new} - 1;
my $top_is_space = $Ktop >= 0 && $rLL_new->[$Ktop]->[_TYPE_] eq 'b';
my $Knew;
if ( $top_is_space && $want_left_space{$type} == WS_NO ) {
#----------------------------------------------------
# Method 1: Convert the top blank into the new token.
#----------------------------------------------------
# Be Careful: we are working on the top of the new stack, on a token
# which has been stored.
my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE );
$Knew = $Ktop;
$rLL_new->[$Knew]->[_TOKEN_] = $token;
$rLL_new->[$Knew]->[_TOKEN_LENGTH_] = length($token);
$rLL_new->[$Knew]->[_TYPE_] = $type;
# NOTE: we are changing the output stack without updating variables
# $last_nonblank_code_type, etc. Future needs might require that
# those variables be updated here. For now, we just update the
# type counts as necessary.
if ( $is_counted_type{$type} ) {
my $seqno = $seqno_stack{ $depth_next - 1 };
if ($seqno) {
$self->[_rtype_count_by_seqno_]->{$seqno}->{$type}++;
}
}
# Then store a new blank
$self->store_token($rcopy);
}
else {
#----------------------------------------
# Method 2: Use the normal storage method
#----------------------------------------
# Patch for issue c078: keep line indexes in order. If the top
# token is a space that we are keeping (due to '-wls=...) then
# we have to check that old line indexes stay in order.
# In very rare
# instances in which side comments have been deleted and converted
# into blanks, we may have filtered down multiple blanks into just
# one. In that case the top blank may have a higher line number
# than the previous nonblank token. Although the line indexes of
# blanks are not really significant, we need to keep them in order
# in order to pass error checks.
if ($top_is_space) {
my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_];
my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_];
if ( $new_top_ix < $old_top_ix ) {
$rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix;
}
}
my $rcopy = copy_token_as_type( $rLL_new->[$Kp], $type, $token );
$self->store_token($rcopy);
$Knew = @{$rLL_new} - 1;
}
return $Knew;
} ## end sub store_new_token
sub check_Q {
# Check that a quote looks okay, and report possible problems
# to the logfile.
my ( $self, $KK, $Kfirst, $line_number ) = @_;
my $token = $rLL->[$KK]->[_TOKEN_];
if ( $token =~ /\t/ ) {
$self->note_embedded_tab($line_number);
}
# The remainder of this routine looks for something like
# '$var = s/xxx/yyy/;'
# in case it should have been '$var =~ s/xxx/yyy/;'
# Start by looking for a token beginning with one of: s y m / tr
return
unless ( $is_s_y_m_slash{ substr( $token, 0, 1 ) }
|| substr( $token, 0, 2 ) eq 'tr' );
# ... and preceded by one of: = == !=
my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
return unless ( defined($Kp) );
my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
return unless ( $is_unexpected_equals{$previous_nonblank_type} );
my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
my $previous_nonblank_type_2 = 'b';
my $previous_nonblank_token_2 = EMPTY_STRING;
my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
if ( defined($Kpp) ) {
$previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
$previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
}
my $next_nonblank_token = EMPTY_STRING;
my $Kn = $KK + 1;
my $Kmax = @{$rLL} - 1;
if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
if ( $Kn <= $Kmax ) {
$next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
}
my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
if (
# preceded by simple scalar
$previous_nonblank_type_2 eq 'i'
&& $previous_nonblank_token_2 =~ /^\$/
# followed by some kind of termination
# (but give complaint if we can not see far enough ahead)
&& $next_nonblank_token =~ /^[; \)\}]$/
# scalar is not declared
## =~ /^(my|our|local)$/
&& !( $type_0 eq 'k' && $is_my_our_local{$token_0} )
)
{
my $lno = 1 + $rLL_new->[$Kp]->[_LINE_INDEX_];
my $guess = substr( $previous_nonblank_token, 0, 1 ) . '~';
complain(
"Line $lno: Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
);
}
return;
} ## end sub check_Q
} ## end closure respace_tokens
sub copy_token_as_type {
# This provides a quick way to create a new token by
# slightly modifying an existing token.
my ( $rold_token, $type, $token ) = @_;
if ( !defined($token) ) {
if ( $type eq 'b' ) {
$token = SPACE;
}
elsif ( $type eq 'q' ) {
$token = EMPTY_STRING;
}
elsif ( $type eq '->' ) {
$token = '->';
}
elsif ( $type eq ';' ) {
$token = ';';
}
elsif ( $type eq ',' ) {
$token = ',';
}
else {
# Unexpected type ... this sub will work as long as both $token and
# $type are defined, but we should catch any unexpected types during
# development.
if (DEVEL_MODE) {
Fault(<<EOM);
sub 'copy_token_as_type' received token type '$type' but expects just one of: 'b' 'q' '->' or ';'
EOM
}
# Shouldn't get here
$token = $type;
}
}
my @rnew_token = @{$rold_token};
$rnew_token[_TYPE_] = $type;
$rnew_token[_TOKEN_] = $token;
$rnew_token[_TYPE_SEQUENCE_] = EMPTY_STRING;
return \@rnew_token;
} ## end sub copy_token_as_type
sub K_next_code {
my ( $self, $KK, $rLL ) = @_;
# return the index K of the next nonblank, non-comment token
return if ( !defined($KK) );
return if ( $KK < 0 );
# use the standard array unless given otherwise
$rLL = $self->[_rLL_] if ( !defined($rLL) );
my $Num = @{$rLL};
my $Knnb = $KK + 1;
while ( $Knnb < $Num ) {
if ( !defined( $rLL->[$Knnb] ) ) {
# We seem to have encountered a gap in our array.
# This shouldn't happen because sub write_line() pushed
# items into the $rLL array.
Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
return;
}
if ( $rLL->[$Knnb]->[_TYPE_] ne 'b'
&& $rLL->[$Knnb]->[_TYPE_] ne '#' )
{
return $Knnb;
}
$Knnb++;
}
return;
} ## end sub K_next_code
sub K_next_nonblank {
my ( $self, $KK, $rLL ) = @_;
# return the index K of the next nonblank token, or
# return undef if none
return if ( !defined($KK) );
return if ( $KK < 0 );
# The third arg allows this routine to be used on any array. This is
# useful in sub respace_tokens when we are copying tokens from an old $rLL
# to a new $rLL array. But usually the third arg will not be given and we
# will just use the $rLL array in $self.
$rLL = $self->[_rLL_] if ( !defined($rLL) );
my $Num = @{$rLL};
my $Knnb = $KK + 1;
return if ( $Knnb >= $Num );
return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
return if ( ++$Knnb >= $Num );
return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
# Backup loop. Very unlikely to get here; it means we have neighboring
# blanks in the token stream.
$Knnb++;
while ( $Knnb < $Num ) {
# Safety check, this fault shouldn't happen: The $rLL array is the
# main array of tokens, so all entries should be used. It is
# initialized in sub write_line, and then re-initialized by sub
# store_token() within sub respace_tokens. Tokens are pushed on
# so there shouldn't be any gaps.
if ( !defined( $rLL->[$Knnb] ) ) {
Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
return;
}
if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
$Knnb++;
}
return;
} ## end sub K_next_nonblank
sub K_previous_code {
# return the index K of the previous nonblank, non-comment token
# Call with $KK=undef to start search at the top of the array
my ( $self, $KK, $rLL ) = @_;
# use the standard array unless given otherwise
$rLL = $self->[_rLL_] unless ( defined($rLL) );
my $Num = @{$rLL};
if ( !defined($KK) ) { $KK = $Num }
if ( $KK > $Num ) {
# This fault can be caused by a programming error in which a bad $KK is
# given. The caller should make the first call with KK_new=undef to
# avoid this error.
Fault(
"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
) if (DEVEL_MODE);
return;
}
my $Kpnb = $KK - 1;
while ( $Kpnb >= 0 ) {
if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b'
&& $rLL->[$Kpnb]->[_TYPE_] ne '#' )
{
return $Kpnb;
}
$Kpnb--;
}
return;
} ## end sub K_previous_code
sub K_previous_nonblank {
# return index of previous nonblank token before item K;
# Call with $KK=undef to start search at the top of the array
my ( $self, $KK, $rLL ) = @_;
# use the standard array unless given otherwise
$rLL = $self->[_rLL_] unless ( defined($rLL) );
my $Num = @{$rLL};
if ( !defined($KK) ) { $KK = $Num }
if ( $KK > $Num ) {
# This fault can be caused by a programming error in which a bad $KK is
# given. The caller should make the first call with KK_new=undef to
# avoid this error.
Fault(
"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
) if (DEVEL_MODE);
return;
}
my $Kpnb = $KK - 1;
return if ( $Kpnb < 0 );
return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
return if ( --$Kpnb < 0 );
return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
# Backup loop. We should not get here unless some routine
# slipped repeated blanks into the token stream.
return if ( --$Kpnb < 0 );
while ( $Kpnb >= 0 ) {
if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
$Kpnb--;
}
return;
} ## end sub K_previous_nonblank
sub parent_seqno_by_K {
# Return the sequence number of the parent container of token K, if any.
my ( $self, $KK ) = @_;
my $rLL = $self->[_rLL_];
# The task is to jump forward to the next container token
# and use the sequence number of either it or its parent.
# For example, consider the following with seqno=5 of the '[' and ']'
# being called with index K of the first token of each line:
# # result
# push @tests, # -
# [ # -
# sub { 99 }, 'do {&{%s} for 1,2}', # 5
# '(&{})(&{})', undef, # 5
# [ 2, 2, 0 ], 0 # 5
# ]; # -
# NOTE: The ending parent will be SEQ_ROOT for a balanced file. For
# unbalanced files, last sequence number will either be undefined or it may
# be at a deeper level. In either case we will just return SEQ_ROOT to
# have a defined value and allow formatting to proceed.
my $parent_seqno = SEQ_ROOT;
my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
if ($type_sequence) {
$parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
}
else {
my $Kt = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
if ( defined($Kt) ) {
$type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
my $type = $rLL->[$Kt]->[_TYPE_];
# if next container token is closing, it is the parent seqno
if ( $is_closing_type{$type} ) {
$parent_seqno = $type_sequence;
}
# otherwise we want its parent container
else {
$parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
}
}
}
$parent_seqno = SEQ_ROOT if ( !defined($parent_seqno) );
return $parent_seqno;
} ## end sub parent_seqno_by_K
sub is_in_block_by_i {
my ( $self, $i ) = @_;
# returns true if
# token at i is contained in a BLOCK
# or is at root level
# or there is some kind of error (i.e. unbalanced file)
# returns false otherwise
if ( $i < 0 ) {
DEVEL_MODE && Fault("Bad call, i='$i'\n");
return 1;
}
my $seqno = $parent_seqno_to_go[$i];
return 1 if ( !$seqno || $seqno eq SEQ_ROOT );
return 1 if ( $self->[_rblock_type_of_seqno_]->{$seqno} );
return;
} ## end sub is_in_block_by_i
sub is_in_list_by_i {
my ( $self, $i ) = @_;
# returns true if token at i is contained in a LIST
# returns false otherwise
my $seqno = $parent_seqno_to_go[$i];
return if ( !$seqno );
return if ( $seqno eq SEQ_ROOT );
if ( $self->[_ris_list_by_seqno_]->{$seqno} ) {
return 1;
}
return;
} ## end sub is_in_list_by_i
sub is_list_by_K {
# Return true if token K is in a list
my ( $self, $KK ) = @_;
my $parent_seqno = $self->parent_seqno_by_K($KK);
return unless defined($parent_seqno);
return $self->[_ris_list_by_seqno_]->{$parent_seqno};
} ## end sub is_list_by_K
sub is_list_by_seqno {
# Return true if the immediate contents of a container appears to be a
# list.
my ( $self, $seqno ) = @_;
return unless defined($seqno);
return $self->[_ris_list_by_seqno_]->{$seqno};
} ## end sub is_list_by_seqno
sub resync_lines_and_tokens {
my $self = shift;
# Re-construct the arrays of tokens associated with the original input
# lines since they have probably changed due to inserting and deleting
# blanks and a few other tokens.
# Return parameters:
# set severe_error = true if processing needs to terminate
my $severe_error;
my $rqw_lines = [];
my $rLL = $self->[_rLL_];
my $Klimit = $self->[_Klimit_];
my $rlines = $self->[_rlines_];
my @Krange_code_without_comments;
my @Klast_valign_code;
# This is the next token and its line index:
my $Knext = 0;
my $Kmax = defined($Klimit) ? $Klimit : -1;
# Verify that old line indexes are in still order. If this error occurs,
# check locations where sub 'respace_tokens' creates new tokens (like
# blank spaces). It must have set a bad old line index.
if ( DEVEL_MODE && defined($Klimit) ) {
my $iline = $rLL->[0]->[_LINE_INDEX_];
foreach my $KK ( 1 .. $Klimit ) {
my $iline_last = $iline;
$iline = $rLL->[$KK]->[_LINE_INDEX_];
if ( $iline < $iline_last ) {
my $KK_m = $KK - 1;
my $token_m = $rLL->[$KK_m]->[_TOKEN_];
my $token = $rLL->[$KK]->[_TOKEN_];
my $type_m = $rLL->[$KK_m]->[_TYPE_];
my $type = $rLL->[$KK]->[_TYPE_];
Fault(<<EOM);
Line indexes out of order at index K=$KK:
at KK-1 =$KK_m: old line=$iline_last, type='$type_m', token='$token_m'
at KK =$KK: old line=$iline, type='$type', token='$token',
EOM
}
}
}
my $iline = -1;
foreach my $line_of_tokens ( @{$rlines} ) {
$iline++;
my $line_type = $line_of_tokens->{_line_type};
if ( $line_type eq 'CODE' ) {
# Get the old number of tokens on this line
my $rK_range_old = $line_of_tokens->{_rK_range};
my ( $Kfirst_old, $Klast_old ) = @{$rK_range_old};
my $Kdiff_old = 0;
if ( defined($Kfirst_old) ) {
$Kdiff_old = $Klast_old - $Kfirst_old;
}
# Find the range of NEW K indexes for the line:
# $Kfirst = index of first token on line
# $Klast = index of last token on line
my ( $Kfirst, $Klast );
my $Knext_beg = $Knext; # this will be $Kfirst if we find tokens
# Optimization: Although the actual K indexes may be completely
# changed after respacing, the number of tokens on any given line
# will often be nearly unchanged. So we will see if we can start
# our search by guessing that the new line has the same number
# of tokens as the old line.
my $Knext_guess = $Knext + $Kdiff_old;
if ( $Knext_guess > $Knext
&& $Knext_guess < $Kmax
&& $rLL->[$Knext_guess]->[_LINE_INDEX_] <= $iline )
{
# the guess is good, so we can start our search here
$Knext = $Knext_guess + 1;
}
while ($Knext <= $Kmax
&& $rLL->[$Knext]->[_LINE_INDEX_] <= $iline )
{
$Knext++;
}
if ( $Knext > $Knext_beg ) {
$Klast = $Knext - 1;
# Delete any terminal blank token
if ( $rLL->[$Klast]->[_TYPE_] eq 'b' ) { $Klast -= 1 }
if ( $Klast < $Knext_beg ) {
$Klast = undef;
}
else {
$Kfirst = $Knext_beg;
# Save ranges of non-comment code. This will be used by
# sub keep_old_line_breaks.
if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) {
push @Krange_code_without_comments, [ $Kfirst, $Klast ];
}
# Only save ending K indexes of code types which are blank
# or 'VER'. These will be used for a convergence check.
# See related code in sub 'convey_batch_to_vertical_aligner'
my $CODE_type = $line_of_tokens->{_code_type};
if ( !$CODE_type
|| $CODE_type eq 'VER' )
{
push @Klast_valign_code, $Klast;
}
}
}
# It is only safe to trim the actual line text if the input
# line had a terminal blank token. Otherwise, we may be
# in a quote.
if ( $line_of_tokens->{_ended_in_blank_token} ) {
$line_of_tokens->{_line_text} =~ s/\s+$//;
}
$line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
# Deleting semicolons can create new empty code lines
# which should be marked as blank
if ( !defined($Kfirst) ) {
my $CODE_type = $line_of_tokens->{_code_type};
if ( !$CODE_type ) {
$line_of_tokens->{_code_type} = 'BL';
}
}
else {
#---------------------------------------------------
# save indexes of all lines with a 'q' at either end
# for later use by sub find_multiline_qw
#---------------------------------------------------
if ( $rLL->[$Kfirst]->[_TYPE_] eq 'q'
|| $rLL->[$Klast]->[_TYPE_] eq 'q' )
{
push @{$rqw_lines}, $iline;
}
}
}
}
# There shouldn't be any nodes beyond the last one. This routine is
# relinking lines and tokens after the tokens have been respaced. A fault
# here indicates some kind of bug has been introduced into the above loops.
# There is not good way to keep going; we better stop here.
if ( $Knext <= $Kmax ) {
Fault_Warn(
"unexpected tokens at end of file when reconstructing lines");
$severe_error = 1;
return ( $severe_error, $rqw_lines );
}
$self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments;
# Setup the convergence test in the FileWriter based on line-ending indexes
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->setup_convergence_test( \@Klast_valign_code );
return ( $severe_error, $rqw_lines );
} ## end sub resync_lines_and_tokens
sub check_for_old_break {
my ( $self, $KK, $rkeep_break_hash, $rbreak_hash ) = @_;
# This sub is called to help implement flags:
# --keep-old-breakpoints-before and --keep-old-breakpoints-after
# Given:
# $KK = index of a token,
# $rkeep_break_hash = user control for --keep-old-...
# $rbreak_hash = hash of tokens where breaks are requested
# Set $rbreak_hash as follows if a user break is requested:
# = 1 make a hard break (flush the current batch)
# best for something like leading commas (-kbb=',')
# = 2 make a soft break (keep building current batch)
# best for something like leading ->
my $rLL = $self->[_rLL_];
my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
# non-container tokens use the type as the key
if ( !$seqno ) {
my $type = $rLL->[$KK]->[_TYPE_];
if ( $rkeep_break_hash->{$type} ) {
$rbreak_hash->{$KK} = $is_soft_keep_break_type{$type} ? 2 : 1;
}
}
# container tokens use the token as the key
else {
my $token = $rLL->[$KK]->[_TOKEN_];
my $flag = $rkeep_break_hash->{$token};
if ($flag) {
my $match = $flag eq '1' || $flag eq '*';
# check for special matching codes
if ( !$match ) {
if ( $token eq '(' || $token eq ')' ) {
$match = $self->match_paren_control_flag( $seqno, $flag );
}
elsif ( $token eq '{' || $token eq '}' ) {
# These tentative codes 'b' and 'B' for brace types are
# placeholders for possible future brace types. They
# are not documented and may be changed.
my $block_type = $self->[_rblock_type_of_seqno_]->{$seqno};
if ( $flag eq 'b' ) { $match = $block_type }
elsif ( $flag eq 'B' ) { $match = !$block_type }
else {
# unknown code - no match
}
}
else {
## ok: none of the above
}
}
if ($match) {
my $type = $rLL->[$KK]->[_TYPE_];
$rbreak_hash->{$KK} = $is_soft_keep_break_type{$type} ? 2 : 1;
}
}
}
return;
} ## end sub check_for_old_break
sub keep_old_line_breaks {
# Called once per file to find and mark any old line breaks which
# should be kept. We will be translating the input hashes into
# token indexes.
# A flag is set as follows:
# = 1 make a hard break (flush the current batch)
# best for something like leading commas (-kbb=',')
# = 2 make a soft break (keep building current batch)
# best for something like leading ->
my ($self) = @_;
my $rLL = $self->[_rLL_];
my $rKrange_code_without_comments =
$self->[_rKrange_code_without_comments_];
my $rbreak_before_Kfirst = $self->[_rbreak_before_Kfirst_];
my $rbreak_after_Klast = $self->[_rbreak_after_Klast_];
my $rbreak_container = $self->[_rbreak_container_];
#----------------------------------------
# Apply --break-at-old-method-breakpoints
#----------------------------------------
# This code moved here from sub break_lists to fix b1120
if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
foreach my $item ( @{$rKrange_code_without_comments} ) {
my ( $Kfirst, $Klast ) = @{$item};
my $type = $rLL->[$Kfirst]->[_TYPE_];
my $token = $rLL->[$Kfirst]->[_TOKEN_];
# leading '->' use a value of 2 which causes a soft
# break rather than a hard break
if ( $type eq '->' ) {
$rbreak_before_Kfirst->{$Kfirst} = 2;
}
# leading ')->' use a special flag to insure that both
# opening and closing parens get opened
# Fix for b1120: only for parens, not braces
elsif ( $token eq ')' ) {
my $Kn = $self->K_next_nonblank($Kfirst);
next if ( !defined($Kn) );
next if ( $Kn > $Klast );
next if ( $rLL->[$Kn]->[_TYPE_] ne '->' );
my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_];
next if ( !$seqno );
# Note: in previous versions there was a fix here to avoid
# instability between conflicting -bom and -pvt or -pvtc flags.
# The fix skipped -bom for a small line difference. But this
# was troublesome, and instead the fix has been moved to
# sub set_vertical_tightness_flags where priority is given to
# the -bom flag over -pvt and -pvtc flags. Both opening and
# closing paren flags are involved because even though -bom only
# requests breaking before the closing paren, automated logic
# opens the opening paren when the closing paren opens.
# Relevant cases are b977, b1215, b1270, b1303
$rbreak_container->{$seqno} = 1;
}
else {
## ok: not a special case
}
}
}
#---------------------------------------------------------------------
# Apply --keep-old-breakpoints-before and --keep-old-breakpoints-after
#---------------------------------------------------------------------
return unless ( %keep_break_before_type || %keep_break_after_type );
foreach my $item ( @{$rKrange_code_without_comments} ) {
my ( $Kfirst, $Klast ) = @{$item};
$self->check_for_old_break( $Kfirst, \%keep_break_before_type,
$rbreak_before_Kfirst );
$self->check_for_old_break( $Klast, \%keep_break_after_type,
$rbreak_after_Klast );
}
return;
} ## end sub keep_old_line_breaks
sub weld_containers {
# Called once per file to do any welding operations requested by --weld*
# flags.
my ($self) = @_;
# This count is used to eliminate needless calls for weld checks elsewhere
$total_weld_count = 0;
return if ( $rOpts->{'indent-only'} );
return unless ($rOpts_add_newlines);
# Important: sub 'weld_cuddled_blocks' must be called before
# sub 'weld_nested_containers'. This is because the cuddled option needs to
# use the original _LEVEL_ values of containers, but the weld nested
# containers changes _LEVEL_ of welded containers.
# Here is a good test case to be sure that both cuddling and welding
# are working and not interfering with each other: <<snippets/ce_wn1.in>>
# perltidy -wn -ce
# if ($BOLD_MATH) { (
# $labels, $comment,
# join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
# ) } else { (
# &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
# $after
# ) }
$self->weld_cuddled_blocks() if ( %{$rcuddled_block_types} );
if ( $rOpts->{'weld-nested-containers'} ) {
$self->weld_nested_containers();
$self->weld_nested_quotes();
}
#-------------------------------------------------------------
# All welding is done. Finish setting up weld data structures.
#-------------------------------------------------------------
my $rLL = $self->[_rLL_];
my $rK_weld_left = $self->[_rK_weld_left_];
my $rK_weld_right = $self->[_rK_weld_right_];
my $rweld_len_right_at_K = $self->[_rweld_len_right_at_K_];
my @K_multi_weld;
my @keys = keys %{$rK_weld_right};
$total_weld_count = @keys;
# First pass to process binary welds.
# This loop is processed in unsorted order for efficiency.
foreach my $Kstart (@keys) {
my $Kend = $rK_weld_right->{$Kstart};
# An error here would be due to an incorrect initialization introduced
# in one of the above weld routines, like sub weld_nested.
if ( $Kend <= $Kstart ) {
Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n")
if (DEVEL_MODE);
next;
}
# Set weld values for all tokens this welded pair
foreach ( $Kstart + 1 .. $Kend ) {
$rK_weld_left->{$_} = $Kstart;
}
foreach my $Kx ( $Kstart .. $Kend - 1 ) {
$rK_weld_right->{$Kx} = $Kend;
$rweld_len_right_at_K->{$Kx} =
$rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
$rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
}
# Remember the leftmost index of welds which continue to the right
if ( defined( $rK_weld_right->{$Kend} )
&& !defined( $rK_weld_left->{$Kstart} ) )
{
push @K_multi_weld, $Kstart;
}
}
# Second pass to process chains of welds (these are rare).
# This has to be processed in sorted order.
if (@K_multi_weld) {
my $Kend = -1;
foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) {
# Skip any interior K which was originally missing a left link
next if ( $Kstart <= $Kend );
# Find the end of this chain
$Kend = $rK_weld_right->{$Kstart};
my $Knext = $rK_weld_right->{$Kend};
while ( defined($Knext) ) {
$Kend = $Knext;
$Knext = $rK_weld_right->{$Kend};
}
# Set weld values this chain
foreach ( $Kstart + 1 .. $Kend ) {
$rK_weld_left->{$_} = $Kstart;
}
foreach my $Kx ( $Kstart .. $Kend - 1 ) {
$rK_weld_right->{$Kx} = $Kend;
$rweld_len_right_at_K->{$Kx} =
$rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
$rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
}
}
}
return;
} ## end sub weld_containers
sub cumulative_length_before_K {
my ( $self, $KK ) = @_;
# Returns the cumulative character length from the first token to
# token before the token at index $KK.
my $rLL = $self->[_rLL_];
return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
}
sub weld_cuddled_blocks {
my ($self) = @_;
# Called once per file to handle cuddled formatting
my $rK_weld_left = $self->[_rK_weld_left_];
my $rK_weld_right = $self->[_rK_weld_right_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
# This routine implements the -cb flag by finding the appropriate
# closing and opening block braces and welding them together.
return unless ( %{$rcuddled_block_types} );
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $rbreak_container = $self->[_rbreak_container_];
my $ris_broken_container = $self->[_ris_broken_container_];
my $ris_cuddled_closing_brace = $self->[_ris_cuddled_closing_brace_];
my $K_closing_container = $self->[_K_closing_container_];
# A stack to remember open chains at all levels: This is a hash rather than
# an array for safety because negative levels can occur in files with
# errors. This allows us to keep processing with negative levels.
# $in_chain{$level} = [$chain_type, $type_sequence];
my %in_chain;
my $CBO = $rOpts->{'cuddled-break-option'};
# loop over structure items to find cuddled pairs
my $level = 0;
my $KNEXT = $self->[_K_first_seq_item_];
while ( defined($KNEXT) ) {
my $KK = $KNEXT;
$KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
my $rtoken_vars = $rLL->[$KK];
my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
if ( !$type_sequence ) {
next if ( $KK == 0 ); # first token in file may not be container
# A fault here implies that an error was made in the little loop at
# the bottom of sub 'respace_tokens' which set the values of
# _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
# loop control lines above.
Fault("sequence = $type_sequence not defined at K=$KK")
if (DEVEL_MODE);
next;
}
# NOTE: we must use the original levels here. They can get changed
# by sub 'weld_nested_containers', so this routine must be called
# before sub 'weld_nested_containers'.
my $last_level = $level;
$level = $rtoken_vars->[_LEVEL_];
if ( $level < $last_level ) { $in_chain{$last_level} = undef }
elsif ( $level > $last_level ) { $in_chain{$level} = undef }
else {
## ok - ($level == $last_level)
}
# We are only looking at code blocks
my $token = $rtoken_vars->[_TOKEN_];
my $type = $rtoken_vars->[_TYPE_];
next unless ( $type eq $token );
if ( $token eq '{' ) {
my $block_type = $rblock_type_of_seqno->{$type_sequence};
if ( !$block_type ) {
# patch for unrecognized block types which may not be labeled
my $Kp = $self->K_previous_nonblank($KK);
while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
$Kp = $self->K_previous_nonblank($Kp);
}
next unless $Kp;
$block_type = $rLL->[$Kp]->[_TOKEN_];
}
if ( $in_chain{$level} ) {
# we are in a chain and are at an opening block brace.
# See if we are welding this opening brace with the previous
# block brace. Get their identification numbers:
my $closing_seqno = $in_chain{$level}->[1];
my $opening_seqno = $type_sequence;
# The preceding block must be on multiple lines so that its
# closing brace will start a new line.
if ( !$ris_broken_container->{$closing_seqno}
&& !$rbreak_container->{$closing_seqno} )
{
next unless ( $CBO == 2 );
$rbreak_container->{$closing_seqno} = 1;
}
# We can weld the closing brace to its following word ..
my $Ko = $K_closing_container->{$closing_seqno};
my $Kon;
if ( defined($Ko) ) {
$Kon = $self->K_next_nonblank($Ko);
}
# ..unless it is a comment
if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) {
# OK to weld these two tokens...
$rK_weld_right->{$Ko} = $Kon;
$rK_weld_left->{$Kon} = $Ko;
# Set flag that we want to break the next container
# so that the cuddled line is balanced.
$rbreak_container->{$opening_seqno} = 1
if ($CBO);
# Remember which braces are cuddled.
# The closing brace is used to set adjusted indentations.
# The opening brace is not yet used but might eventually
# be needed in setting adjusted indentation.
$ris_cuddled_closing_brace->{$closing_seqno} = 1;
}
}
else {
# We are not in a chain. Start a new chain if we see the
# starting block type.
if ( $rcuddled_block_types->{$block_type} ) {
$in_chain{$level} = [ $block_type, $type_sequence ];
}
else {
$block_type = '*';
$in_chain{$level} = [ $block_type, $type_sequence ];
}
}
}
elsif ( $token eq '}' ) {
if ( $in_chain{$level} ) {
# We are in a chain at a closing brace. See if this chain
# continues..
my $Knn = $self->K_next_code($KK);
next unless $Knn;
my $chain_type = $in_chain{$level}->[0];
my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
if (
$rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
)
{
# Note that we do not weld yet because we must wait until
# we we are sure that an opening brace for this follows.
$in_chain{$level}->[1] = $type_sequence;
}
else { $in_chain{$level} = undef }
}
}
else {
## ok - not a curly brace
}
}
return;
} ## end sub weld_cuddled_blocks
sub find_nested_pairs {
my $self = shift;
# This routine is called once per file to do preliminary work needed for
# the --weld-nested option. This information is also needed for adding
# semicolons.
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $Num = @{$rLL};
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
# We define an array of pairs of nested containers
my @nested_pairs;
# Names of calling routines can either be marked as 'i' or 'w',
# and they may invoke a sub call with an '->'. We will consider
# any consecutive string of such types as a single unit when making
# weld decisions. We also allow a leading !
my $is_name_type = {
'i' => 1,
'w' => 1,
'U' => 1,
'->' => 1,
'!' => 1,
};
# Loop over all closing container tokens
foreach my $inner_seqno ( keys %{$K_closing_container} ) {
my $K_inner_closing = $K_closing_container->{$inner_seqno};
# See if it is immediately followed by another, outer closing token
my $K_outer_closing = $K_inner_closing + 1;
$K_outer_closing += 1
if ( $K_outer_closing < $Num
&& $rLL->[$K_outer_closing]->[_TYPE_] eq 'b' );
next if ( $K_outer_closing >= $Num );
my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_];
next if ( !$outer_seqno );
my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_];
next if ( !$is_closing_token{$token_outer_closing} );
# Simple filter: No commas or semicolons in the outer container
my $rtype_count = $self->[_rtype_count_by_seqno_]->{$outer_seqno};
if ($rtype_count) {
next if ( $rtype_count->{','} || $rtype_count->{';'} );
}
# Now we have to check the opening tokens.
my $K_outer_opening = $K_opening_container->{$outer_seqno};
my $K_inner_opening = $K_opening_container->{$inner_seqno};
next if ( !defined($K_outer_opening) );
next if ( !defined($K_inner_opening) );
my $inner_blocktype = $rblock_type_of_seqno->{$inner_seqno};
my $outer_blocktype = $rblock_type_of_seqno->{$outer_seqno};
# Verify that the inner opening token is the next container after the
# outer opening token.
my $K_io_check = $rLL->[$K_outer_opening]->[_KNEXT_SEQ_ITEM_];
next unless defined($K_io_check);
if ( $K_io_check != $K_inner_opening ) {
# The inner opening container does not immediately follow the outer
# opening container, but we may still allow a weld if they are
# separated by a sub signature. For example, we may have something
# like this, where $K_io_check may be at the first 'x' instead of
# 'io'. So we need to hop over the signature and see if we arrive
# at 'io'.
# oo io
# | x x |
# $obj->then( sub ( $code ) {
# ...
# return $c->render(text => '', status => $code);
# } );
# | |
# ic oc
next if ( !$inner_blocktype || $inner_blocktype ne 'sub' );
next if $rLL->[$K_io_check]->[_TOKEN_] ne '(';
my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_];
next unless defined($seqno_signature);
my $K_signature_closing = $K_closing_container->{$seqno_signature};
next unless defined($K_signature_closing);
my $K_test = $rLL->[$K_signature_closing]->[_KNEXT_SEQ_ITEM_];
next
unless ( defined($K_test) && $K_test == $K_inner_opening );
# OK, we have arrived at 'io' in the above diagram. We should put
# a limit on the length or complexity of the signature here. There
# is no perfect way to do this, one way is to put a limit on token
# count. For consistency with older versions, we should allow a
# signature with a single variable to weld, but not with
# multiple variables. A single variable as in 'sub ($code) {' can
# have a $Kdiff of 2 to 4, depending on spacing.
# But two variables like 'sub ($v1,$v2) {' can have a diff of 4 to
# 7, depending on spacing. So to keep formatting consistent with
# previous versions, we will also avoid welding if there is a comma
# in the signature.
my $Kdiff = $K_signature_closing - $K_io_check;
next if ( $Kdiff > 4 );
# backup comma count test; but we cannot get here with Kdiff<=4
my $rtc = $self->[_rtype_count_by_seqno_]->{$seqno_signature};
next if ( $rtc && $rtc->{','} );
}
# Yes .. this is a possible nesting pair.
# They can be separated by a small amount.
my $K_diff = $K_inner_opening - $K_outer_opening;
# Count the number of nonblank characters separating them.
# Note: the $nonblank_count includes the inner opening container
# but not the outer opening container, so it will be >= 1.
if ( $K_diff < 0 ) { next } # Shouldn't happen
my $nonblank_count = 0;
my $type;
my $is_name;
# Here is an example of a long identifier chain which counts as a
# single nonblank here (this spans about 10 K indexes):
# if ( !Boucherot::SetOfConnections->new->handler->execute(
# ^--K_o_o ^--K_i_o
# @array) )
my $Kn_first = $K_outer_opening;
my $Kn_last_nonblank;
my $saw_comment;
foreach my $Kn ( $K_outer_opening + 1 .. $K_inner_opening ) {
next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' );
if ( !$nonblank_count ) { $Kn_first = $Kn }
if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; }
$Kn_last_nonblank = $Kn;
# skip chain of identifier tokens
my $last_type = $type;
my $last_is_name = $is_name;
$type = $rLL->[$Kn]->[_TYPE_];
if ( $type eq '#' ) { $saw_comment = 1; last }
$is_name = $is_name_type->{$type};
next if ( $is_name && $last_is_name );
# do not count a possible leading - of bareword hash key
next if ( $type eq 'm' && !$last_type );
$nonblank_count++;
last if ( $nonblank_count > 2 );
}
# Do not weld across a comment .. fix for c058.
next if ($saw_comment);
# Patch for b1104: do not weld to a paren preceded by sort/map/grep
# because the special line break rules may cause a blinking state
if ( defined($Kn_last_nonblank)
&& $rLL->[$K_inner_opening]->[_TOKEN_] eq '('
&& $rLL->[$Kn_last_nonblank]->[_TYPE_] eq 'k' )
{
my $token = $rLL->[$Kn_last_nonblank]->[_TOKEN_];
# Turn off welding at sort/map/grep (
if ( $is_sort_map_grep{$token} ) { $nonblank_count = 10 }
}
my $token_oo = $rLL->[$K_outer_opening]->[_TOKEN_];
if (
# 1: adjacent opening containers, like: do {{
$nonblank_count == 1
# 2. anonymous sub + prototype or sig: )->then( sub ($code) {
# ... but it seems best not to stack two structural blocks, like
# this
# sub make_anon_with_my_sub { sub {
# because it probably hides the structure a little too much.
|| ( $inner_blocktype
&& $inner_blocktype eq 'sub'
&& $rLL->[$Kn_first]->[_TOKEN_] eq 'sub'
&& !$outer_blocktype )
# 3. short item following opening paren, like: fun( yyy (
|| $nonblank_count == 2 && $token_oo eq '('
# 4. weld around fat commas, if requested (git #108), such as
# elf->call_method( method_name_foo => {
|| ( $type eq '=>'
&& $nonblank_count <= 3
&& %weld_fat_comma_rules
&& $weld_fat_comma_rules{$token_oo} )
)
{
push @nested_pairs,
[ $inner_seqno, $outer_seqno, $K_inner_closing ];
}
next;
}
# The weld routine expects the pairs in order in the form
# [$seqno_inner, $seqno_outer]
# And they must be in the same order as the inner closing tokens
# (otherwise, welds of three or more adjacent tokens will not work). The K
# value of this inner closing token has temporarily been stored for
# sorting.
@nested_pairs =
# Drop the K index after sorting (it would cause trouble downstream)
map { [ $_->[0], $_->[1] ] }
# Sort on the K values
sort { $a->[2] <=> $b->[2] } @nested_pairs;
return \@nested_pairs;
} ## end sub find_nested_pairs
sub match_paren_control_flag {
# Decide if this paren is excluded by user request:
# undef matches no parens
# '*' matches all parens
# 'k' matches only if the previous nonblank token is a perl builtin
# keyword (such as 'if', 'while'),
# 'K' matches if 'k' does not, meaning if the previous token is not a
# keyword.
# 'f' matches if the previous token is a function other than a keyword.
# 'F' matches if 'f' does not.
# 'w' matches if either 'k' or 'f' match.
# 'W' matches if 'w' does not.
my ( $self, $seqno, $flag, $rLL ) = @_;
# Input parameters:
# $seqno = sequence number of the container (should be paren)
# $flag = the flag which defines what matches
# $rLL = an optional alternate token list needed for respace operations
$rLL = $self->[_rLL_] unless ( defined($rLL) );
return 0 unless ( defined($flag) );
return 0 if $flag eq '0';
return 1 if $flag eq '1';
return 1 if $flag eq '*';
return 0 unless ($seqno);
my $K_opening = $self->[_K_opening_container_]->{$seqno};
return unless ( defined($K_opening) );
my ( $is_f, $is_k, $is_w );
my $Kp = $self->K_previous_nonblank( $K_opening, $rLL );
if ( defined($Kp) ) {
my $type_p = $rLL->[$Kp]->[_TYPE_];
# keyword?
$is_k = $type_p eq 'k';
# function call?
$is_f = $self->[_ris_function_call_paren_]->{$seqno};
# either keyword or function call?
$is_w = $is_k || $is_f;
}
my $match;
if ( $flag eq 'k' ) { $match = $is_k }
elsif ( $flag eq 'K' ) { $match = !$is_k }
elsif ( $flag eq 'f' ) { $match = $is_f }
elsif ( $flag eq 'F' ) { $match = !$is_f }
elsif ( $flag eq 'w' ) { $match = $is_w }
elsif ( $flag eq 'W' ) { $match = !$is_w }
else {
## no match
}
return $match;
} ## end sub match_paren_control_flag
sub is_excluded_weld {
# decide if this weld is excluded by user request
my ( $self, $KK, $is_leading ) = @_;
my $rLL = $self->[_rLL_];
my $rtoken_vars = $rLL->[$KK];
my $token = $rtoken_vars->[_TOKEN_];
my $rflags = $weld_nested_exclusion_rules{$token};
return 0 unless ( defined($rflags) );
my $flag = $is_leading ? $rflags->[0] : $rflags->[1];
return 0 unless ( defined($flag) );
return 1 if $flag eq '*';
my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
return $self->match_paren_control_flag( $seqno, $flag );
} ## end sub is_excluded_weld
# hashes to simplify welding logic
my %type_ok_after_bareword;
my %has_tight_paren;
BEGIN {
# types needed for welding RULE 6
my @q = qw# => -> { ( [ #;
@type_ok_after_bareword{@q} = (1) x scalar(@q);
# these types do not 'like' to be separated from a following paren
@q = qw(w i q Q G C Z U);
@{has_tight_paren}{@q} = (1) x scalar(@q);
} ## end BEGIN
use constant DEBUG_WELD => 0;
sub setup_new_weld_measurements {
# Define quantities to check for excess line lengths when welded.
# Called by sub 'weld_nested_containers' and sub 'weld_nested_quotes'
my ( $self, $Kouter_opening, $Kinner_opening ) = @_;
# Given indexes of outer and inner opening containers to be welded:
# $Kouter_opening, $Kinner_opening
# Returns these variables:
# $new_weld_ok = true (new weld ok) or false (do not start new weld)
# $starting_indent = starting indentation
# $starting_lentot = starting cumulative length
# $msg = diagnostic message for debugging
my $rLL = $self->[_rLL_];
my $rlines = $self->[_rlines_];
my $starting_level;
my $starting_ci;
my $starting_lentot;
my $maximum_text_length;
my $msg = EMPTY_STRING;
my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
my $rK_range = $rlines->[$iline_oo]->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
#-------------------------------------------------------------------------
# We now define a reference index, '$Kref', from which to start measuring
# This choice turns out to be critical for keeping welds stable during
# iterations, so we go through a number of STEPS...
#-------------------------------------------------------------------------
# STEP 1: Our starting guess is to use measure from the first token of the
# current line. This is usually a good guess.
my $Kref = $Kfirst;
# STEP 2: See if we should go back a little farther
my $Kprev = $self->K_previous_nonblank($Kfirst);
if ( defined($Kprev) ) {
# Avoid measuring from between an opening paren and a previous token
# which should stay close to it ... fixes b1185
my $token_oo = $rLL->[$Kouter_opening]->[_TOKEN_];
my $type_prev = $rLL->[$Kprev]->[_TYPE_];
if ( $Kouter_opening == $Kfirst
&& $token_oo eq '('
&& $has_tight_paren{$type_prev} )
{
$Kref = $Kprev;
}
# Back up and count length from a token like '=' or '=>' if -lp
# is used (this fixes b520)
# ...or if a break is wanted before there
elsif ($rOpts_line_up_parentheses
|| $want_break_before{$type_prev} )
{
# If there are other sequence items between the start of this line
# and the opening token in question, then do not include tokens on
# the previous line in length calculations. This check added to
# fix case b1174 which had a '?' on the line
my $no_previous_seq_item = $Kref == $Kouter_opening
|| $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_] == $Kouter_opening;
if ( $no_previous_seq_item
&& substr( $type_prev, 0, 1 ) eq '=' )
{
$Kref = $Kprev;
# Fix for b1144 and b1112: backup to the first nonblank
# character before the =>, or to the start of its line.
if ( $type_prev eq '=>' ) {
my $iline_prev = $rLL->[$Kprev]->[_LINE_INDEX_];
my $rK_range_prev = $rlines->[$iline_prev]->{_rK_range};
my ( $Kfirst_prev, $Klast_prev ) = @{$rK_range_prev};
foreach my $KK ( reverse( $Kfirst_prev .. $Kref - 1 ) ) {
next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
$Kref = $KK;
last;
}
}
}
}
else {
## ok
}
}
# STEP 3: Now look ahead for a ternary and, if found, use it.
# This fixes case b1182.
# Also look for a ')' at the same level and, if found, use it.
# This fixes case b1224.
if ( $Kref < $Kouter_opening ) {
my $Knext = $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_];
my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
while ( $Knext < $Kouter_opening ) {
if ( $rLL->[$Knext]->[_LEVEL_] == $level_oo ) {
if ( $is_ternary{ $rLL->[$Knext]->[_TYPE_] }
|| $rLL->[$Knext]->[_TOKEN_] eq ')' )
{
$Kref = $Knext;
last;
}
}
$Knext = $rLL->[$Knext]->[_KNEXT_SEQ_ITEM_];
}
}
# Define the starting measurements we will need
$starting_lentot =
$Kref <= 0 ? 0 : $rLL->[ $Kref - 1 ]->[_CUMULATIVE_LENGTH_];
$starting_level = $rLL->[$Kref]->[_LEVEL_];
$starting_ci = $rLL->[$Kref]->[_CI_LEVEL_];
$maximum_text_length = $maximum_text_length_at_level[$starting_level] -
$starting_ci * $rOpts_continuation_indentation;
# STEP 4: Switch to using the outer opening token as the reference
# point if a line break before it would make a longer line.
# Fixes case b1055 and is also an alternate fix for b1065.
my $starting_level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
if ( $Kref < $Kouter_opening ) {
my $starting_ci_oo = $rLL->[$Kouter_opening]->[_CI_LEVEL_];
my $lentot_oo = $rLL->[ $Kouter_opening - 1 ]->[_CUMULATIVE_LENGTH_];
my $maximum_text_length_oo =
$maximum_text_length_at_level[$starting_level_oo] -
$starting_ci_oo * $rOpts_continuation_indentation;
# The excess length to any cumulative length K = lenK is either
# $excess = $lenk - ($lentot + $maximum_text_length), or
# $excess = $lenk - ($lentot_oo + $maximum_text_length_oo),
# so the worst case (maximum excess) corresponds to the configuration
# with minimum value of the sum: $lentot + $maximum_text_length
if ( $lentot_oo + $maximum_text_length_oo <
$starting_lentot + $maximum_text_length )
{
$Kref = $Kouter_opening;
$starting_level = $starting_level_oo;
$starting_ci = $starting_ci_oo;
$starting_lentot = $lentot_oo;
$maximum_text_length = $maximum_text_length_oo;
}
}
my $new_weld_ok = 1;
# STEP 5, fix b1020: Avoid problem areas with the -wn -lp combination. The
# combination -wn -lp -dws -naws does not work well and can cause blinkers.
# It will probably only occur in stress testing. For this situation we
# will only start a new weld if we start at a 'good' location.
# - Added 'if' to fix case b1032.
# - Require blank before certain previous characters to fix b1111.
# - Add ';' to fix case b1139
# - Convert from '$ok_to_weld' to '$new_weld_ok' to fix b1162.
# - relaxed constraints for b1227
# - added skip if type is 'q' for b1349 and b1350 b1351 b1352 b1353
# - added skip if type is 'Q' for b1447
if ( $starting_ci
&& $rOpts_line_up_parentheses
&& $rOpts_delete_old_whitespace
&& !$rOpts_add_whitespace
&& $rLL->[$Kinner_opening]->[_TYPE_] ne 'q'
&& $rLL->[$Kinner_opening]->[_TYPE_] ne 'Q'
&& defined($Kprev) )
{
my $type_first = $rLL->[$Kfirst]->[_TYPE_];
my $token_first = $rLL->[$Kfirst]->[_TOKEN_];
my $type_prev = $rLL->[$Kprev]->[_TYPE_];
my $type_pp = 'b';
if ( $Kprev >= 0 ) { $type_pp = $rLL->[ $Kprev - 1 ]->[_TYPE_] }
my $is_good_location =
$type_prev =~ /^[\,\.\;]/
|| ( $type_prev =~ /^[=\{\[\(\L]/
&& ( $type_pp eq 'b' || $type_pp eq '}' || $type_first eq 'k' ) )
|| $type_first =~ /^[=\,\.\;\{\[\(\L]/
|| $type_first eq '||'
|| (
$type_first eq 'k'
&& ( $token_first eq 'if'
|| $token_first eq 'or' )
);
if ( !$is_good_location ) {
$msg =
"Skipping weld: poor break with -lp and ci at type_first='$type_first' type_prev='$type_prev' type_pp=$type_pp\n";
$new_weld_ok = 0;
}
}
return ( $new_weld_ok, $maximum_text_length, $starting_lentot, $msg );
} ## end sub setup_new_weld_measurements
sub excess_line_length_for_Krange {
my ( $self, $Kfirst, $Klast ) = @_;
# returns $excess_length =
# by how many characters a line composed of tokens $Kfirst .. $Klast will
# exceed the allowed line length
my $rLL = $self->[_rLL_];
my $length_before_Kfirst =
$Kfirst <= 0
? 0
: $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
# backup before a side comment if necessary
my $Kend = $Klast;
if ( $rOpts_ignore_side_comment_lengths
&& $rLL->[$Klast]->[_TYPE_] eq '#' )
{
my $Kprev = $self->K_previous_nonblank($Klast);
if ( defined($Kprev) && $Kprev >= $Kfirst ) { $Kend = $Kprev }
}
# get the length of the text
my $length = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - $length_before_Kfirst;
# get the size of the text window
my $level = $rLL->[$Kfirst]->[_LEVEL_];
my $ci_level = $rLL->[$Kfirst]->[_CI_LEVEL_];
my $max_text_length = $maximum_text_length_at_level[$level] -
$ci_level * $rOpts_continuation_indentation;
my $excess_length = $length - $max_text_length;
DEBUG_WELD
&& print
"Kfirst=$Kfirst, Klast=$Klast, Kend=$Kend, level=$level, ci=$ci_level, max_text_length=$max_text_length, length=$length\n";
return ($excess_length);
} ## end sub excess_line_length_for_Krange
sub weld_nested_containers {
my ($self) = @_;
# Called once per file for option '--weld-nested-containers'
my $rK_weld_left = $self->[_rK_weld_left_];
my $rK_weld_right = $self->[_rK_weld_right_];
# This routine implements the -wn flag by "welding together"
# the nested closing and opening tokens which were previously
# identified by sub 'find_nested_pairs'. "welding" simply
# involves setting certain hash values which will be checked
# later during formatting.
my $rLL = $self->[_rLL_];
my $rlines = $self->[_rlines_];
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $ris_asub_block = $self->[_ris_asub_block_];
my $rmax_vertical_tightness = $self->[_rmax_vertical_tightness_];
my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
# Find nested pairs of container tokens for any welding.
my $rnested_pairs = $self->find_nested_pairs();
# Return unless there are nested pairs to weld
return unless defined($rnested_pairs) && @{$rnested_pairs};
# NOTE: It would be nice to apply RULE 5 right here by deleting unwanted
# pairs. But it isn't clear if this is possible because we don't know
# which sequences might actually start a weld.
my $rOpts_break_at_old_method_breakpoints =
$rOpts->{'break-at-old-method-breakpoints'};
# This array will hold the sequence numbers of the tokens to be welded.
my @welds;
# Variables needed for estimating line lengths
my $maximum_text_length; # maximum spaces available for text
my $starting_lentot; # cumulative text to start of current line
my $iline_outer_opening = -1;
my $weld_count_this_start = 0;
my $weld_starts_in_block = 0;
# OLD: $single_line_tol added to fix cases b1180 b1181
# = $rOpts_continuation_indentation > $rOpts_indent_columns ? 1 : 0;
# NEW: $single_line_tol=0 fixes b1212; and b1180-1181 work ok now
# =1 for -vmll and -lp; fixes b1452, b1453, b1454
# NOTE: the combination -vmll and -lp can be unstable, especially when
# also combined with -wn. It may eventually be necessary to turn off -vmll
# if -lp is set. For now, this works. The value '1' is a minimum which
# works but can be increased if necessary.
my $single_line_tol =
$rOpts_variable_maximum_line_length && $rOpts_line_up_parentheses
? 1
: 0;
my $multiline_tol = $single_line_tol + 1 +
max( $rOpts_indent_columns, $rOpts_continuation_indentation );
# Define a welding cutoff level: do not start a weld if the inside
# container level equals or exceeds this level.
# We use the minimum of two criteria, either of which may be more
# restrictive. The 'alpha' value is more restrictive in (b1206, b1252) and
# the 'beta' value is more restrictive in other cases (b1243).
# Reduced beta term from beta+3 to beta+2 to fix b1401. Previously:
# my $weld_cutoff_level = min($stress_level_alpha, $stress_level_beta + 2);
# This is now '$high_stress_level'.
# The vertical tightness flags can throw off line length calculations.
# This patch was added to fix instability issue b1284.
# It works to always use a tol of 1 for 1 line block length tests, but
# this restricted value keeps test case wn6.wn working as before.
# It may be necessary to include '[' and '{' here in the future.
my $one_line_tol = $opening_vertical_tightness{'('} ? 1 : 0;
# Abbreviations:
# _oo=outer opening, i.e. first of { {
# _io=inner opening, i.e. second of { {
# _oc=outer closing, i.e. second of } {
# _ic=inner closing, i.e. first of } }
my $previous_pair;
# Main loop over nested pairs...
# We are working from outermost to innermost pairs so that
# level changes will be complete when we arrive at the inner pairs.
while ( my $item = pop( @{$rnested_pairs} ) ) {
my ( $inner_seqno, $outer_seqno ) = @{$item};
my $Kouter_opening = $K_opening_container->{$outer_seqno};
my $Kinner_opening = $K_opening_container->{$inner_seqno};
my $Kouter_closing = $K_closing_container->{$outer_seqno};
my $Kinner_closing = $K_closing_container->{$inner_seqno};
# RULE: do not weld if inner container has <= 3 tokens unless the next
# token is a heredoc (so we know there will be multiple lines)
if ( $Kinner_closing - $Kinner_opening <= 4 ) {
my $Knext_nonblank = $self->K_next_nonblank($Kinner_opening);
next unless defined($Knext_nonblank);
my $type = $rLL->[$Knext_nonblank]->[_TYPE_];
next unless ( $type eq 'h' );
}
my $outer_opening = $rLL->[$Kouter_opening];
my $inner_opening = $rLL->[$Kinner_opening];
my $outer_closing = $rLL->[$Kouter_closing];
my $inner_closing = $rLL->[$Kinner_closing];
# RULE: do not weld to a hash brace. The reason is that it has a very
# strong bond strength to the next token, so a line break after it
# may not work. Previously we allowed welding to something like @{
# but that caused blinking states (cases b751, b779).
if ( $inner_opening->[_TYPE_] eq 'L' ) {
next;
}
# RULE: do not weld to a square bracket which does not contain commas
if ( $inner_opening->[_TYPE_] eq '[' ) {
my $rtype_count = $self->[_rtype_count_by_seqno_]->{$inner_seqno};
next unless ( $rtype_count && $rtype_count->{','} );
# Do not weld if there is text before a '[' such as here:
# curr_opt ( @beg [2,5] )
# It will not break into the desired sandwich structure.
# This fixes case b109, 110.
my $Kdiff = $Kinner_opening - $Kouter_opening;
next if ( $Kdiff > 2 );
next
if ( $Kdiff == 2
&& $rLL->[ $Kouter_opening + 1 ]->[_TYPE_] ne 'b' );
}
# RULE: Avoid welding under stress. The idea is that we need to have a
# little space* within a welded container to avoid instability. Note
# that after each weld the level values are reduced, so long multiple
# welds can still be made. This rule will seldom be a limiting factor
# in actual working code. Fixes b1206, b1243.
my $inner_level = $inner_opening->[_LEVEL_];
if ( $inner_level >= $high_stress_level ) { next }
# Set flag saying if this pair starts a new weld
my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
# Set flag saying if this pair is adjacent to the previous nesting pair
# (even if previous pair was rejected as a weld)
my $touch_previous_pair =
defined($previous_pair) && $outer_seqno == $previous_pair->[0];
$previous_pair = $item;
my $do_not_weld_rule = 0;
my $Msg = EMPTY_STRING;
my $is_one_line_weld;
my $iline_oo = $outer_opening->[_LINE_INDEX_];
my $iline_io = $inner_opening->[_LINE_INDEX_];
my $iline_ic = $inner_closing->[_LINE_INDEX_];
my $iline_oc = $outer_closing->[_LINE_INDEX_];
my $token_oo = $outer_opening->[_TOKEN_];
my $token_io = $inner_opening->[_TOKEN_];
# DO-NOT-WELD RULE 7: Do not weld if this conflicts with -bom
# Added for case b973. Moved here from below to fix b1423.
if ( !$do_not_weld_rule
&& $rOpts_break_at_old_method_breakpoints
&& $iline_io > $iline_oo )
{
foreach my $iline ( $iline_oo + 1 .. $iline_io ) {
my $rK_range = $rlines->[$iline]->{_rK_range};
next unless defined($rK_range);
my ( $Kfirst, $Klast ) = @{$rK_range};
next unless defined($Kfirst);
if ( $rLL->[$Kfirst]->[_TYPE_] eq '->' ) {
$do_not_weld_rule = 7;
last;
}
}
}
next if ($do_not_weld_rule);
# Turn off vertical tightness at possible one-line welds. Fixes b1402,
# b1419, b1421, b1424, b1425. This also fixes issues b1338, b1339,
# b1340, b1341, b1342, b1343, which previously used a separate fix.
# Issue c161 is the latest and simplest check, using
# $iline_ic==$iline_io as the test.
if ( %opening_vertical_tightness
&& $iline_ic == $iline_io
&& $opening_vertical_tightness{$token_oo} )
{
$rmax_vertical_tightness->{$outer_seqno} = 0;
}
my $is_multiline_weld =
$iline_oo == $iline_io
&& $iline_ic == $iline_oc
&& $iline_io != $iline_ic;
if (DEBUG_WELD) {
my $len_oo = $rLL->[$Kouter_opening]->[_CUMULATIVE_LENGTH_];
my $len_io = $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_];
$Msg .= <<EOM;
Pair seqo=$outer_seqno seqi=$inner_seqno lines: loo=$iline_oo lio=$iline_io lic=$iline_ic loc=$iline_oc
Koo=$Kouter_opening Kio=$Kinner_opening Kic=$Kinner_closing Koc=$Kouter_closing lenoo=$len_oo lenio=$len_io
tokens '$token_oo' .. '$token_io'
EOM
}
# DO-NOT-WELD RULE 0:
# Avoid a new paren-paren weld if inner parens are 'sheared' (separated
# by one line). This can produce instabilities (fixes b1250 b1251
# 1256).
if ( !$is_multiline_weld
&& $iline_ic == $iline_io + 1
&& $token_oo eq '('
&& $token_io eq '(' )
{
if (DEBUG_WELD) {
$Msg .= "RULE 0: Not welding due to sheared inner parens\n";
print {*STDOUT} $Msg;
}
next;
}
# If this pair is not adjacent to the previous pair (skipped or not),
# then measure lengths from the start of line of oo.
if (
!$touch_previous_pair
# Also do this if restarting at a new line; fixes case b965, s001
|| ( !$weld_count_this_start && $iline_oo > $iline_outer_opening )
)
{
# Remember the line we are using as a reference
$iline_outer_opening = $iline_oo;
$weld_count_this_start = 0;
$weld_starts_in_block = 0;
( my $new_weld_ok, $maximum_text_length, $starting_lentot, my $msg )
= $self->setup_new_weld_measurements( $Kouter_opening,
$Kinner_opening );
if (
!$new_weld_ok
&& ( $iline_oo != $iline_io
|| $iline_ic != $iline_oc )
)
{
if (DEBUG_WELD) { print {*STDOUT} $msg }
next;
}
my $rK_range = $rlines->[$iline_oo]->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
# An existing one-line weld is a line in which
# (1) the containers are all on one line, and
# (2) the line does not exceed the allowable length
if ( $iline_oo == $iline_oc ) {
# All the tokens are on one line, now check their length.
# Start with the full line index range. We will reduce this
# in the coding below in some cases.
my $Kstart = $Kfirst;
my $Kstop = $Klast;
# Note that the following minimal choice for measuring will
# work and will not cause any instabilities because it is
# invariant:
## my $Kstart = $Kouter_opening;
## my $Kstop = $Kouter_closing;
# But that can lead to some undesirable welds. So a little
# more complicated method has been developed.
# We are trying to avoid creating bad two-line welds when we are
# working on long, previously un-welded input text, such as
# INPUT (example of a long input line weld candidate):
## $mutation->transpos( $self->RNA->position($mutation->label, $atg_label));
# GOOD two-line break: (not welded; result marked too long):
## $mutation->transpos(
## $self->RNA->position($mutation->label, $atg_label));
# BAD two-line break: (welded; result if we weld):
## $mutation->transpos($self->RNA->position(
## $mutation->label, $atg_label));
# We can only get an approximate estimate of the final length,
# since the line breaks may change, and for -lp mode because
# even the indentation is not yet known.
my $level_first = $rLL->[$Kfirst]->[_LEVEL_];
my $level_last = $rLL->[$Klast]->[_LEVEL_];
my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
my $level_oc = $rLL->[$Kouter_closing]->[_LEVEL_];
# - measure to the end of the original line if balanced
# - measure to the closing container if unbalanced (fixes b1230)
#if ( $level_first != $level_last ) { $Kstop = $Kouter_closing }
if ( $level_oc > $level_last ) { $Kstop = $Kouter_closing }
# - measure from the start of the original line if balanced
# - measure from the most previous token with same level
# if unbalanced (b1232)
if ( $Kouter_opening > $Kfirst && $level_oo > $level_first ) {
$Kstart = $Kouter_opening;
foreach
my $KK ( reverse( $Kfirst + 1 .. $Kouter_opening - 1 ) )
{
next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
last if ( $rLL->[$KK]->[_LEVEL_] < $level_oo );
$Kstart = $KK;
}
}
my $excess =
$self->excess_line_length_for_Krange( $Kstart, $Kstop );
# Coding simplified here for case b1219.
# Increased tol from 0 to 1 when pvt>0 to fix b1284.
$is_one_line_weld = $excess <= $one_line_tol;
}
# DO-NOT-WELD RULE 1:
# Do not weld something that looks like the start of a two-line
# function call, like this: <<snippets/wn6.in>>
# $trans->add_transformation(
# PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
# We will look for a semicolon after the closing paren.
# We want to weld something complex, like this though
# my $compass = uc( opposite_direction( line_to_canvas_direction(
# @{ $coords[0] }, @{ $coords[1] } ) ) );
# Otherwise we will get a 'blinker'. For example, the following
# would become a blinker without this rule:
# $Self->_Add( $SortOrderDisplay{ $Field
# ->GenerateFieldForSelectSQL() } );
# But it is okay to weld a two-line statement if it looks like
# it was already welded, meaning that the two opening containers are
# on a different line that the two closing containers. This is
# necessary to prevent blinking of something like this with
# perltidy -wn -pbp (starting indentation two levels deep):
# $top_label->set_text( gettext(
# "Unable to create personal directory - check permissions.") );
if ( $iline_oc == $iline_oo + 1
&& $iline_io == $iline_ic
&& $token_oo eq '(' )
{
# Look for following semicolon...
my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
my $next_nonblank_type =
defined($Knext_nonblank)
? $rLL->[$Knext_nonblank]->[_TYPE_]
: 'b';
if ( $next_nonblank_type eq ';' ) {
# Then do not weld if no other containers between inner
# opening and closing.
my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
if ( $Knext_seq_item == $Kinner_closing ) {
$do_not_weld_rule = 1;
}
}
}
} ## end starting new weld sequence
else {
# set the 1-line flag if continuing a weld sequence; fixes b1239
$is_one_line_weld = ( $iline_oo == $iline_oc );
}
# DO-NOT-WELD RULE 2:
# Do not weld an opening paren to an inner one line brace block
# We will just use old line numbers for this test and require
# iterations if necessary for convergence
# For example, otherwise we could cause the opening paren
# in the following example to separate from the caller name
# as here:
# $_[0]->code_handler
# ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
# Here is another example where we do not want to weld:
# $wrapped->add_around_modifier(
# sub { push @tracelog => 'around 1'; $_[0]->(); } );
# If the one line sub block gets broken due to length or by the
# user, then we can weld. The result will then be:
# $wrapped->add_around_modifier( sub {
# push @tracelog => 'around 1';
# $_[0]->();
# } );
# Updated to fix cases b1082 b1102 b1106 b1115:
# Also, do not weld to an intact inner block if the outer opening token
# is on a different line. For example, this prevents oscillation
# between these two states in case b1106:
# return map{
# ($_,[$self->$_(@_[1..$#_])])
# }@every;
# return map { (
# $_, [ $self->$_( @_[ 1 .. $#_ ] ) ]
# ) } @every;
# The effect of this change on typical code is very minimal. Sometimes
# it may take a second iteration to converge, but this gives protection
# against blinking.
if ( !$do_not_weld_rule
&& !$is_one_line_weld
&& $iline_ic == $iline_io )
{
$do_not_weld_rule = 2
if ( $token_oo eq '(' || $iline_oo != $iline_io );
}
# DO-NOT-WELD RULE 2A:
# Do not weld an opening asub brace in -lp mode if -asbl is set. This
# helps avoid instabilities in one-line block formation, and fixes
# b1241. Previously, the '$is_one_line_weld' flag was tested here
# instead of -asbl, and this fixed most cases. But it turns out that
# the real problem was the -asbl flag, and switching to this was
# necessary to fixe b1268. This also fixes b1269, b1277, b1278.
if ( !$do_not_weld_rule
&& $rOpts_line_up_parentheses
&& $rOpts_asbl
&& $ris_asub_block->{$outer_seqno} )
{
$do_not_weld_rule = '2A';
}
# DO-NOT-WELD RULE 3:
# Do not weld if this makes our line too long.
# Use a tolerance which depends on if the old tokens were welded
# (fixes cases b746 b748 b749 b750 b752 b753 b754 b755 b756 b758 b759)
if ( !$do_not_weld_rule ) {
# Measure to a little beyond the inner opening token if it is
# followed by a bare word, which may have unusual line break rules.
# NOTE: Originally this was OLD RULE 6: do not weld to a container
# which is followed on the same line by an unknown bareword token.
# This can cause blinkers (cases b626, b611). But OK to weld one
# line welds to fix cases b1057 b1064. For generality, OLD RULE 6
# has been merged into RULE 3 here to also fix cases b1078 b1091.
my $K_for_length = $Kinner_opening;
my $Knext_io = $self->K_next_nonblank($Kinner_opening);
next unless ( defined($Knext_io) ); # shouldn't happen
my $type_io_next = $rLL->[$Knext_io]->[_TYPE_];
# Note: may need to eventually also include other types here,
# such as 'Z' and 'Y': if ($type_io_next =~ /^[ZYw]$/) {
if ( $type_io_next eq 'w' ) {
my $Knext_io2 = $self->K_next_nonblank($Knext_io);
next unless ( defined($Knext_io2) );
my $type_io_next2 = $rLL->[$Knext_io2]->[_TYPE_];
if ( !$type_ok_after_bareword{$type_io_next2} ) {
$K_for_length = $Knext_io2;
}
}
# Use a tolerance for welds over multiple lines to avoid blinkers.
# We can use zero tolerance if it looks like we are working on an
# existing weld.
my $tol =
$is_one_line_weld || $is_multiline_weld
? $single_line_tol
: $multiline_tol;
# By how many characters does this exceed the text window?
my $excess =
$self->cumulative_length_before_K($K_for_length) -
$starting_lentot + 1 + $tol -
$maximum_text_length;
# Old patch: Use '>=0' instead of '> 0' here to fix cases b995 b998
# b1000 b1001 b1007 b1008 b1009 b1010 b1011 b1012 b1016 b1017 b1018
# Revised patch: New tolerance definition allows going back to '> 0'
# here. This fixes case b1124. See also cases b1087 and b1087a.
if ( $excess > 0 ) { $do_not_weld_rule = 3 }
if (DEBUG_WELD) {
$Msg .=
"RULE 3 test: excess length to K=$Kinner_opening is $excess > 0 with tol= $tol ?) \n";
}
}
# DO-NOT-WELD RULE 4; implemented for git#10:
# Do not weld an opening -ce brace if the next container is on a single
# line, different from the opening brace. (This is very rare). For
# example, given the following with -ce, we will avoid joining the {
# and [
# } else {
# [ $_, length($_) ]
# }
# because this would produce a terminal one-line block:
# } else { [ $_, length($_) ] }
# which may not be what is desired. But given this input:
# } else { [ $_, length($_) ] }
# then we will do the weld and retain the one-line block
if ( !$do_not_weld_rule && $rOpts->{'cuddled-else'} ) {
my $block_type = $rblock_type_of_seqno->{$outer_seqno};
if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
my $io_line = $inner_opening->[_LINE_INDEX_];
my $ic_line = $inner_closing->[_LINE_INDEX_];
my $oo_line = $outer_opening->[_LINE_INDEX_];
if ( $oo_line < $io_line && $ic_line == $io_line ) {
$do_not_weld_rule = 4;
}
}
}
# DO-NOT-WELD RULE 5: do not include welds excluded by user
if (
!$do_not_weld_rule
&& %weld_nested_exclusion_rules
&& ( $self->is_excluded_weld( $Kouter_opening, $starting_new_weld )
|| $self->is_excluded_weld( $Kinner_opening, 0 ) )
)
{
$do_not_weld_rule = 5;
}
# DO-NOT-WELD RULE 6: This has been merged into RULE 3 above.
if ($do_not_weld_rule) {
# After neglecting a pair, we start measuring from start of point
# io ... but not if previous type does not like to be separated
# from its container (fixes case b1184)
my $Kprev = $self->K_previous_nonblank($Kinner_opening);
my $type_prev = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'w';
if ( !$has_tight_paren{$type_prev} ) {
my $starting_level = $inner_opening->[_LEVEL_];
my $starting_ci_level = $inner_opening->[_CI_LEVEL_];
$starting_lentot =
$self->cumulative_length_before_K($Kinner_opening);
$maximum_text_length =
$maximum_text_length_at_level[$starting_level] -
$starting_ci_level * $rOpts_continuation_indentation;
}
if (DEBUG_WELD) {
$Msg .= "Not welding due to RULE $do_not_weld_rule\n";
print {*STDOUT} $Msg;
}
# Normally, a broken pair should not decrease indentation of
# intermediate tokens:
## if ( $last_pair_broken ) { next }
# However, for long strings of welded tokens, such as '{{{{{{...'
# we will allow broken pairs to also remove indentation.
# This will keep very long strings of opening and closing
# braces from marching off to the right. We will do this if the
# number of tokens in a weld before the broken weld is 4 or more.
# This rule will mainly be needed for test scripts, since typical
# welds have fewer than about 4 welded tokens.
if ( !@welds || @{ $welds[-1] } < 4 ) { next }
}
# otherwise start new weld ...
elsif ($starting_new_weld) {
$weld_count_this_start++;
if (DEBUG_WELD) {
$Msg .= "Starting new weld\n";
print {*STDOUT} $Msg;
}
push @welds, $item;
my $parent_seqno = $self->parent_seqno_by_K($Kouter_closing);
$weld_starts_in_block = $parent_seqno == SEQ_ROOT
|| $rblock_type_of_seqno->{$parent_seqno};
$rK_weld_right->{$Kouter_opening} = $Kinner_opening;
$rK_weld_left->{$Kinner_opening} = $Kouter_opening;
$rK_weld_right->{$Kinner_closing} = $Kouter_closing;
$rK_weld_left->{$Kouter_closing} = $Kinner_closing;
}
# ... or extend current weld
else {
$weld_count_this_start++;
if (DEBUG_WELD) {
$Msg .= "Extending current weld\n";
print {*STDOUT} $Msg;
}
unshift @{ $welds[-1] }, $inner_seqno;
$rK_weld_right->{$Kouter_opening} = $Kinner_opening;
$rK_weld_left->{$Kinner_opening} = $Kouter_opening;
$rK_weld_right->{$Kinner_closing} = $Kouter_closing;
$rK_weld_left->{$Kouter_closing} = $Kinner_closing;
# Keep a broken container broken at multiple welds. This might
# also be useful for simple welds, but for now it is restricted
# to multiple welds to minimize changes to existing coding. This
# fixes b1429, b1430. Updated for issue c198: but allow a
# line differences of 1 (simple shear) so that a simple shear
# can remain or become a single line.
if ( $iline_ic - $iline_io > 1 ) {
# Only set this break if it is the last possible weld in this
# chain. This will keep some extreme test cases unchanged.
my $is_chain_end = !@{$rnested_pairs}
|| $rnested_pairs->[-1]->[1] != $inner_seqno;
if ($is_chain_end) {
$self->[_rbreak_container_]->{$inner_seqno} = 1;
}
}
}
# After welding, reduce the indentation level if all intermediate tokens
my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
if ( $dlevel != 0 ) {
my $Kstart = $Kinner_opening;
my $Kstop = $Kinner_closing;
foreach my $KK ( $Kstart .. $Kstop ) {
$rLL->[$KK]->[_LEVEL_] += $dlevel;
}
# Copy opening ci level to help break at = for -lp mode (case b1124)
$rLL->[$Kinner_opening]->[_CI_LEVEL_] =
$rLL->[$Kouter_opening]->[_CI_LEVEL_];
# But only copy the closing ci level if the outer container is
# in a block; otherwise poor results can be produced.
if ($weld_starts_in_block) {
$rLL->[$Kinner_closing]->[_CI_LEVEL_] =
$rLL->[$Kouter_closing]->[_CI_LEVEL_];
}
}
}
return;
} ## end sub weld_nested_containers
sub weld_nested_quotes {
# Called once per file for option '--weld-nested-containers'. This
# does welding on qw quotes.
my $self = shift;
# See if quotes are excluded from welding
my $rflags = $weld_nested_exclusion_rules{'q'};
return if ( defined($rflags) && defined( $rflags->[1] ) );
my $rK_weld_left = $self->[_rK_weld_left_];
my $rK_weld_right = $self->[_rK_weld_right_];
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $Num = @{$rLL};
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $rlines = $self->[_rlines_];
my $starting_lentot;
my $maximum_text_length;
my $is_single_quote = sub {
my ( $Kbeg, $Kend, $quote_type ) = @_;
foreach my $K ( $Kbeg .. $Kend ) {
my $test_type = $rLL->[$K]->[_TYPE_];
next if ( $test_type eq 'b' );
return if ( $test_type ne $quote_type );
}
return 1;
};
# Length tolerance - same as previously used for sub weld_nested
my $multiline_tol =
1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation );
# look for single qw quotes nested in containers
my $KNEXT = $self->[_K_first_seq_item_];
while ( defined($KNEXT) ) {
my $KK = $KNEXT;
$KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
my $rtoken_vars = $rLL->[$KK];
my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
if ( !$outer_seqno ) {
next if ( $KK == 0 ); # first token in file may not be container
# A fault here implies that an error was made in the little loop at
# the bottom of sub 'respace_tokens' which set the values of
# _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
# loop control lines above.
Fault("sequence = $outer_seqno not defined at K=$KK")
if (DEVEL_MODE);
next;
}
my $token = $rtoken_vars->[_TOKEN_];
if ( $is_opening_token{$token} ) {
# see if the next token is a quote of some type
my $Kn = $KK + 1;
$Kn += 1
if ( $Kn < $Num && $rLL->[$Kn]->[_TYPE_] eq 'b' );
next if ( $Kn >= $Num );
my $next_token = $rLL->[$Kn]->[_TOKEN_];
my $next_type = $rLL->[$Kn]->[_TYPE_];
next
unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
&& substr( $next_token, 0, 1 ) eq 'q' );
# The token before the closing container must also be a quote
my $Kouter_closing = $K_closing_container->{$outer_seqno};
my $Kinner_closing = $self->K_previous_nonblank($Kouter_closing);
next unless $rLL->[$Kinner_closing]->[_TYPE_] eq $next_type;
# This is an inner opening container
my $Kinner_opening = $Kn;
# Do not weld to single-line quotes. Nothing is gained, and it may
# look bad.
next if ( $Kinner_closing == $Kinner_opening );
# Only weld to quotes delimited with container tokens. This is
# because welding to arbitrary quote delimiters can produce code
# which is less readable than without welding.
my $closing_delimiter =
substr( $rLL->[$Kinner_closing]->[_TOKEN_], -1, 1 );
next
unless ( $is_closing_token{$closing_delimiter}
|| $closing_delimiter eq '>' );
# Now make sure that there is just a single quote in the container
next
unless (
$is_single_quote->(
$Kinner_opening + 1,
$Kinner_closing - 1,
$next_type
)
);
# OK: This is a candidate for welding
my $Msg = EMPTY_STRING;
my $do_not_weld;
my $Kouter_opening = $K_opening_container->{$outer_seqno};
my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
my $iline_io = $rLL->[$Kinner_opening]->[_LINE_INDEX_];
my $iline_oc = $rLL->[$Kouter_closing]->[_LINE_INDEX_];
my $iline_ic = $rLL->[$Kinner_closing]->[_LINE_INDEX_];
my $is_old_weld =
( $iline_oo == $iline_io && $iline_ic == $iline_oc );
# Fix for case b1189. If quote is marked as type 'Q' then only weld
# if the two closing tokens are on the same input line. Otherwise,
# the closing line will be output earlier in the pipeline than
# other CODE lines and welding will not actually occur. This will
# leave a half-welded structure with potential formatting
# instability. This might be fixed by adding a check for a weld on
# a closing Q token and sending it down the normal channel, but it
# would complicate the code and is potentially risky.
next
if (!$is_old_weld
&& $next_type eq 'Q'
&& $iline_ic != $iline_oc );
# If welded, the line must not exceed allowed line length
( my $ok_to_weld, $maximum_text_length, $starting_lentot, my $msg )
= $self->setup_new_weld_measurements( $Kouter_opening,
$Kinner_opening );
if ( !$ok_to_weld ) {
if (DEBUG_WELD) { print {*STDOUT} $msg }
next;
}
my $length =
$rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
my $excess = $length + $multiline_tol - $maximum_text_length;
my $excess_max = ( $is_old_weld ? $multiline_tol : 0 );
if ( $excess >= $excess_max ) {
$do_not_weld = 1;
}
if (DEBUG_WELD) {
if ( !$is_old_weld ) { $is_old_weld = EMPTY_STRING }
$Msg .=
"excess=$excess>=$excess_max, multiline_tol=$multiline_tol, is_old_weld='$is_old_weld'\n";
}
# Check weld exclusion rules for outer container
if ( !$do_not_weld ) {
my $is_leading = !defined( $rK_weld_left->{$Kouter_opening} );
if ( $self->is_excluded_weld( $KK, $is_leading ) ) {
if (DEBUG_WELD) {
$Msg .=
"No qw weld due to weld exclusion rules for outer container\n";
}
$do_not_weld = 1;
}
}
# Check the length of the last line (fixes case b1039)
if ( !$do_not_weld ) {
my $rK_range_ic = $rlines->[$iline_ic]->{_rK_range};
my ( $Kfirst_ic, $Klast_ic ) = @{$rK_range_ic};
my $excess_ic =
$self->excess_line_length_for_Krange( $Kfirst_ic,
$Kouter_closing );
# Allow extra space for additional welded closing container(s)
# and a space and comma or semicolon.
# NOTE: weld len has not been computed yet. Use 2 spaces
# for now, correct for a single weld. This estimate could
# be made more accurate if necessary.
my $weld_len =
defined( $rK_weld_right->{$Kouter_closing} ) ? 2 : 0;
if ( $excess_ic + $weld_len + 2 > 0 ) {
if (DEBUG_WELD) {
$Msg .=
"No qw weld due to excess ending line length=$excess_ic + $weld_len + 2 > 0\n";
}
$do_not_weld = 1;
}
}
if ($do_not_weld) {
if (DEBUG_WELD) {
$Msg .= "Not Welding QW\n";
print {*STDOUT} $Msg;
}
next;
}
# OK to weld
if (DEBUG_WELD) {
$Msg .= "Welding QW\n";
print {*STDOUT} $Msg;
}
$rK_weld_right->{$Kouter_opening} = $Kinner_opening;
$rK_weld_left->{$Kinner_opening} = $Kouter_opening;
$rK_weld_right->{$Kinner_closing} = $Kouter_closing;
$rK_weld_left->{$Kouter_closing} = $Kinner_closing;
# Undo one indentation level if an extra level was added to this
# multiline quote
my $qw_seqno =
$self->[_rstarting_multiline_qw_seqno_by_K_]->{$Kinner_opening};
if ( $qw_seqno
&& $self->[_rmultiline_qw_has_extra_level_]->{$qw_seqno} )
{
foreach my $K ( $Kinner_opening + 1 .. $Kinner_closing - 1 ) {
$rLL->[$K]->[_LEVEL_] -= 1;
}
$rLL->[$Kinner_opening]->[_CI_LEVEL_] = 0;
$rLL->[$Kinner_closing]->[_CI_LEVEL_] = 0;
}
# undo CI for other welded quotes
else {
foreach my $K ( $Kinner_opening .. $Kinner_closing ) {
$rLL->[$K]->[_CI_LEVEL_] = 0;
}
}
# Change the level of a closing qw token to be that of the outer
# containing token. This will allow -lp indentation to function
# correctly in the vertical aligner.
# Patch to fix c002: but not if it contains text
if ( length( $rLL->[$Kinner_closing]->[_TOKEN_] ) == 1 ) {
$rLL->[$Kinner_closing]->[_LEVEL_] =
$rLL->[$Kouter_closing]->[_LEVEL_];
}
}
}
return;
} ## end sub weld_nested_quotes
sub is_welded_at_seqno {
my ( $self, $seqno ) = @_;
# given a sequence number:
# return true if it is welded either left or right
# return false otherwise
return unless ( $total_weld_count && defined($seqno) );
my $KK_o = $self->[_K_opening_container_]->{$seqno};
return unless defined($KK_o);
return defined( $self->[_rK_weld_left_]->{$KK_o} )
|| defined( $self->[_rK_weld_right_]->{$KK_o} );
} ## end sub is_welded_at_seqno
sub mark_short_nested_blocks {
# This routine looks at the entire file and marks any short nested blocks
# which should not be broken. The results are stored in the hash
# $rshort_nested->{$type_sequence}
# which will be true if the container should remain intact.
#
# For example, consider the following line:
# sub cxt_two { sort { $a <=> $b } test_if_list() }
# The 'sort' block is short and nested within an outer sub block.
# Normally, the existence of the 'sort' block will force the sub block to
# break open, but this is not always desirable. Here we will set a flag for
# the sort block to prevent this. To give the user control, we will
# follow the input file formatting. If either of the blocks is broken in
# the input file then we will allow it to remain broken. Otherwise we will
# set a flag to keep it together in later formatting steps.
# The flag which is set here will be checked in two places:
# 'sub process_line_of_CODE' and 'sub starting_one_line_block'
my $self = shift;
return if $rOpts->{'indent-only'};
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
return unless ( $rOpts->{'one-line-block-nesting'} );
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $rbreak_container = $self->[_rbreak_container_];
my $ris_broken_container = $self->[_ris_broken_container_];
my $rshort_nested = $self->[_rshort_nested_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
# Variables needed for estimating line lengths
my $maximum_text_length;
my $starting_lentot;
my $length_tol = 1;
my $excess_length_to_K = sub {
my ($K) = @_;
# Estimate the length from the line start to a given token
my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
my $excess_length = $length + $length_tol - $maximum_text_length;
return ($excess_length);
};
# loop over all containers
my @open_block_stack;
my $iline = -1;
my $KNEXT = $self->[_K_first_seq_item_];
while ( defined($KNEXT) ) {
my $KK = $KNEXT;
$KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
my $rtoken_vars = $rLL->[$KK];
my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
if ( !$type_sequence ) {
next if ( $KK == 0 ); # first token in file may not be container
# A fault here implies that an error was made in the little loop at
# the bottom of sub 'respace_tokens' which set the values of
# _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
# loop control lines above.
Fault("sequence = $type_sequence not defined at K=$KK")
if (DEVEL_MODE);
next;
}
# Patch: do not mark short blocks with welds.
# In some cases blinkers can form (case b690).
if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) {
next;
}
# We are just looking at code blocks
my $token = $rtoken_vars->[_TOKEN_];
my $type = $rtoken_vars->[_TYPE_];
next unless ( $type eq $token );
next unless ( $rblock_type_of_seqno->{$type_sequence} );
# Keep a stack of all acceptable block braces seen.
# Only consider blocks entirely on one line so dump the stack when line
# changes.
my $iline_last = $iline;
$iline = $rLL->[$KK]->[_LINE_INDEX_];
if ( $iline != $iline_last ) { @open_block_stack = () }
if ( $token eq '}' ) {
if (@open_block_stack) { pop @open_block_stack }
}
next unless ( $token eq '{' );
# block must be balanced (bad scripts may be unbalanced)
my $K_opening = $K_opening_container->{$type_sequence};
my $K_closing = $K_closing_container->{$type_sequence};
next unless ( defined($K_opening) && defined($K_closing) );
# require that this block be entirely on one line
next
if ( $ris_broken_container->{$type_sequence}
|| $rbreak_container->{$type_sequence} );
# See if this block fits on one line of allowed length (which may
# be different from the input script)
$starting_lentot =
$KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
my $level = $rLL->[$KK]->[_LEVEL_];
my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
$maximum_text_length =
$maximum_text_length_at_level[$level] -
$ci_level * $rOpts_continuation_indentation;
# Dump the stack if block is too long and skip this block
if ( $excess_length_to_K->($K_closing) > 0 ) {
@open_block_stack = ();
next;
}
# OK, Block passes tests, remember it
push @open_block_stack, $type_sequence;
# We are only marking nested code blocks,
# so check for a previous block on the stack
next if ( @open_block_stack <= 1 );
# Looks OK, mark this as a short nested block
$rshort_nested->{$type_sequence} = 1;
}
return;
} ## end sub mark_short_nested_blocks
sub special_indentation_adjustments {
my ($self) = @_;
# Called once per file to define the levels to be used for computing
# actual indentation. These levels are initialized to be the structural
# levels and then are adjusted if necessary for special purposes.
# The adjustments are made either by changing _CI_LEVEL_ directly or
# by setting modified levels in the array $self->[_radjusted_levels_].
# NOTE: This routine is called after the weld routines, which may have
# already adjusted the initial values of _LEVEL_, so we are making
# adjustments on top of those levels. It would be nicer to have the
# weld routines also use this adjustment, but that gets complicated
# when we combine -gnu -wn and also have some welded quotes.
my $Klimit = $self->[_Klimit_];
my $rLL = $self->[_rLL_];
my $radjusted_levels = $self->[_radjusted_levels_];
return unless ( defined($Klimit) );
# Initialize the adjusted levels to be the structural levels
foreach my $KK ( 0 .. $Klimit ) {
$radjusted_levels->[$KK] = $rLL->[$KK]->[_LEVEL_];
}
# First set adjusted levels for any non-indenting braces.
$self->do_non_indenting_braces();
# Adjust breaks and indentation list containers
$self->break_before_list_opening_containers();
# Set adjusted levels for the whitespace cycle option.
$self->whitespace_cycle_adjustment();
$self->braces_left_setup();
# Adjust continuation indentation if -bli is set
$self->bli_adjustment();
$self->extended_ci()
if ($rOpts_extended_continuation_indentation);
# Now clip any adjusted levels to be non-negative
$self->clip_adjusted_levels();
return;
} ## end sub special_indentation_adjustments
sub clip_adjusted_levels {
# Replace any negative adjusted levels with zero.
# Negative levels can occur in files with brace errors.
my ($self) = @_;
my $radjusted_levels = $self->[_radjusted_levels_];
return unless defined($radjusted_levels) && @{$radjusted_levels};
my $min = min( @{$radjusted_levels} ); # fast check for min
if ( $min < 0 ) {
# slow loop, but rarely needed
foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) }
}
return;
} ## end sub clip_adjusted_levels
sub do_non_indenting_braces {
# Called once per file to handle the --non-indenting-braces parameter.
# Remove indentation within marked braces if requested
my ($self) = @_;
# Any non-indenting braces have been found by sub find_non_indenting_braces
# and are defined by the following hash:
my $rseqno_non_indenting_brace_by_ix =
$self->[_rseqno_non_indenting_brace_by_ix_];
return unless ( %{$rseqno_non_indenting_brace_by_ix} );
my $rlines = $self->[_rlines_];
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
my $radjusted_levels = $self->[_radjusted_levels_];
# First locate all of the marked blocks
my @K_stack;
foreach my $ix ( keys %{$rseqno_non_indenting_brace_by_ix} ) {
my $seqno = $rseqno_non_indenting_brace_by_ix->{$ix};
my $KK = $K_opening_container->{$seqno};
my $line_of_tokens = $rlines->[$ix];
my $rK_range = $line_of_tokens->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
$rspecial_side_comment_type->{$Klast} = 'NIB';
push @K_stack, [ $KK, 1 ];
my $Kc = $K_closing_container->{$seqno};
push @K_stack, [ $Kc, -1 ] if ( defined($Kc) );
}
return unless (@K_stack);
@K_stack = sort { $a->[0] <=> $b->[0] } @K_stack;
# Then loop to remove indentation within marked blocks
my $KK_last = 0;
my $ndeep = 0;
foreach my $item (@K_stack) {
my ( $KK, $inc ) = @{$item};
if ( $ndeep > 0 ) {
foreach ( $KK_last + 1 .. $KK ) {
$radjusted_levels->[$_] -= $ndeep;
}
# We just subtracted the old $ndeep value, which only applies to a
# '{'. The new $ndeep applies to a '}', so we undo the error.
if ( $inc < 0 ) { $radjusted_levels->[$KK] += 1 }
}
$ndeep += $inc;
$KK_last = $KK;
}
return;
} ## end sub do_non_indenting_braces
sub whitespace_cycle_adjustment {
my $self = shift;
# Called once per file to implement the --whitespace-cycle option
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $radjusted_levels = $self->[_radjusted_levels_];
my $maximum_level = $self->[_maximum_level_];
if ( $rOpts_whitespace_cycle
&& $rOpts_whitespace_cycle > 0
&& $rOpts_whitespace_cycle < $maximum_level )
{
my $Kmax = @{$rLL} - 1;
my $whitespace_last_level = -1;
my @whitespace_level_stack = ();
my $last_nonblank_type = 'b';
my $last_nonblank_token = EMPTY_STRING;
foreach my $KK ( 0 .. $Kmax ) {
my $level_abs = $radjusted_levels->[$KK];
my $level = $level_abs;
if ( $level_abs < $whitespace_last_level ) {
pop(@whitespace_level_stack);
}
if ( !@whitespace_level_stack ) {
push @whitespace_level_stack, $level_abs;
}
else {
if ( $level_abs > $whitespace_last_level ) {
$level = $whitespace_level_stack[-1] +
( $level_abs - $whitespace_last_level );
if (
# 1 Try to break at a block brace
(
$level > $rOpts_whitespace_cycle
&& $last_nonblank_type eq '{'
&& $last_nonblank_token eq '{'
)
# 2 Then either a brace or bracket
|| ( $level > $rOpts_whitespace_cycle + 1
&& $last_nonblank_token =~ /^[\{\[]$/ )
# 3 Then a paren too
|| $level > $rOpts_whitespace_cycle + 2
)
{
$level = 1;
}
push @whitespace_level_stack, $level;
}
}
$level = $whitespace_level_stack[-1];
$radjusted_levels->[$KK] = $level;
$whitespace_last_level = $level_abs;
my $type = $rLL->[$KK]->[_TYPE_];
my $token = $rLL->[$KK]->[_TOKEN_];
if ( $type ne 'b' ) {
$last_nonblank_type = $type;
$last_nonblank_token = $token;
}
}
}
return;
} ## end sub whitespace_cycle_adjustment
use constant DEBUG_BBX => 0;
sub break_before_list_opening_containers {
my ($self) = @_;
# This routine is called once per batch to implement parameters
# --break-before-hash-brace=n and similar -bbx=n flags
# and their associated indentation flags:
# --break-before-hash-brace-and-indent and similar -bbxi=n
# Nothing to do if none of the -bbx=n parameters has been set
return unless %break_before_container_types;
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
# Loop over all opening container tokens
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $ris_broken_container = $self->[_ris_broken_container_];
my $ris_permanently_broken = $self->[_ris_permanently_broken_];
my $rhas_list = $self->[_rhas_list_];
my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
my $radjusted_levels = $self->[_radjusted_levels_];
my $rparent_of_seqno = $self->[_rparent_of_seqno_];
my $rlines = $self->[_rlines_];
my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
my $rK_weld_right = $self->[_rK_weld_right_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $length_tol =
max( 1, $rOpts_continuation_indentation, $rOpts_indent_columns );
if ($rOpts_ignore_old_breakpoints) {
# Patch suggested by b1231; the old tol was excessive.
## $length_tol += $rOpts_maximum_line_length;
$length_tol *= 2;
}
my $rbreak_before_container_by_seqno = {};
my $rwant_reduced_ci = {};
foreach my $seqno ( keys %{$K_opening_container} ) {
#----------------------------------------------------------------
# Part 1: Examine any -bbx=n flags
#----------------------------------------------------------------
next if ( $rblock_type_of_seqno->{$seqno} );
my $KK = $K_opening_container->{$seqno};
# This must be a list or contain a list.
# Note1: switched from 'has_broken_list' to 'has_list' to fix b1024.
# Note2: 'has_list' holds the depth to the sub-list. We will require
# a depth of just 1
my $is_list = $self->is_list_by_seqno($seqno);
my $has_list = $rhas_list->{$seqno};
# Fix for b1173: if welded opening container, use flag of innermost
# seqno. Otherwise, the restriction $has_list==1 prevents triple and
# higher welds from following the -BBX parameters.
if ($total_weld_count) {
my $KK_test = $rK_weld_right->{$KK};
if ( defined($KK_test) ) {
my $seqno_inner = $rLL->[$KK_test]->[_TYPE_SEQUENCE_];
$is_list ||= $self->is_list_by_seqno($seqno_inner);
$has_list = $rhas_list->{$seqno_inner};
}
}
next unless ( $is_list || $has_list && $has_list == 1 );
my $has_list_with_lec = $rhas_broken_list_with_lec->{$seqno};
# Only for types of container tokens with a non-default break option
my $token = $rLL->[$KK]->[_TOKEN_];
my $break_option = $break_before_container_types{$token};
next unless ($break_option);
# Do not use -bbx under stress for stability ... fixes b1300
# TODO: review this; do we also need to look at stress_level_lalpha?
my $level = $rLL->[$KK]->[_LEVEL_];
if ( $level >= $stress_level_beta ) {
DEBUG_BBX
&& print
"BBX: Switching off at $seqno: level=$level exceeds beta stress level=$stress_level_beta\n";
next;
}
# Require previous nonblank to be '=' or '=>'
my $Kprev = $KK - 1;
next if ( $Kprev < 0 );
my $prev_type = $rLL->[$Kprev]->[_TYPE_];
if ( $prev_type eq 'b' ) {
$Kprev--;
next if ( $Kprev < 0 );
$prev_type = $rLL->[$Kprev]->[_TYPE_];
}
next unless ( $is_equal_or_fat_comma{$prev_type} );
my $ci = $rLL->[$KK]->[_CI_LEVEL_];
#--------------------------------------------
# New coding for option 2 (break if complex).
#--------------------------------------------
# This new coding uses clues which are invariant under formatting to
# decide if a list is complex. For now it is only applied when -lp
# and -vmll are used, but eventually it may become the standard method.
# Fixes b1274, b1275, and others, including b1099.
if ( $break_option == 2 ) {
if ( $rOpts_line_up_parentheses
|| $rOpts_variable_maximum_line_length )
{
# Start with the basic definition of a complex list...
my $is_complex = $is_list && $has_list;
# and it is also complex if the parent is a list
if ( !$is_complex ) {
my $parent = $rparent_of_seqno->{$seqno};
if ( $self->is_list_by_seqno($parent) ) {
$is_complex = 1;
}
}
# finally, we will call it complex if there are inner opening
# and closing container tokens, not parens, within the outer
# container tokens.
if ( !$is_complex ) {
my $Kp = $self->K_next_nonblank($KK);
my $token_p = defined($Kp) ? $rLL->[$Kp]->[_TOKEN_] : 'b';
if ( $is_opening_token{$token_p} && $token_p ne '(' ) {
my $Kc = $K_closing_container->{$seqno};
my $Km = $self->K_previous_nonblank($Kc);
my $token_m =
defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
# ignore any optional ending comma
if ( $token_m eq ',' ) {
$Km = $self->K_previous_nonblank($Km);
$token_m =
defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
}
$is_complex ||=
$is_closing_token{$token_m} && $token_m ne ')';
}
}
# Convert to option 3 (always break) if complex
next unless ($is_complex);
$break_option = 3;
}
}
# Fix for b1231: the has_list_with_lec does not cover all cases.
# A broken container containing a list and with line-ending commas
# will stay broken, so can be treated as if it had a list with lec.
$has_list_with_lec ||=
$has_list
&& $ris_broken_container->{$seqno}
&& $rlec_count_by_seqno->{$seqno};
DEBUG_BBX
&& print {*STDOUT}
"BBX: Looking at seqno=$seqno, token = $token with option=$break_option\n";
# -bbx=1 = stable, try to follow input
if ( $break_option == 1 ) {
my $iline = $rLL->[$KK]->[_LINE_INDEX_];
my $rK_range = $rlines->[$iline]->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
next unless ( $KK == $Kfirst );
}
# -bbx=2 => apply this style only for a 'complex' list
elsif ( $break_option == 2 ) {
# break if this list contains a broken list with line-ending comma
my $ok_to_break;
my $Msg = EMPTY_STRING;
if ($has_list_with_lec) {
$ok_to_break = 1;
DEBUG_BBX && do { $Msg = "has list with lec;" };
}
if ( !$ok_to_break ) {
# Turn off -xci if -bbx=2 and this container has a sublist but
# not a broken sublist. This avoids creating blinkers. The
# problem is that -xci can cause one-line lists to break open,
# and thereby creating formatting instability.
# This fixes cases b1033 b1036 b1037 b1038 b1042 b1043 b1044
# b1045 b1046 b1047 b1051 b1052 b1061.
if ($has_list) { $rno_xci_by_seqno->{$seqno} = 1 }
my $parent = $rparent_of_seqno->{$seqno};
if ( $self->is_list_by_seqno($parent) ) {
DEBUG_BBX && do { $Msg = "parent is list" };
$ok_to_break = 1;
}
}
if ( !$ok_to_break ) {
DEBUG_BBX
&& print {*STDOUT} "Not breaking at seqno=$seqno: $Msg\n";
next;
}
DEBUG_BBX
&& print {*STDOUT} "OK to break at seqno=$seqno: $Msg\n";
# Patch: turn off -xci if -bbx=2 and -lp
# This fixes cases b1090 b1095 b1101 b1116 b1118 b1121 b1122
$rno_xci_by_seqno->{$seqno} = 1 if ($rOpts_line_up_parentheses);
}
# -bbx=3 = always break
elsif ( $break_option == 3 ) {
# ok to break
}
# Shouldn't happen! Bad flag, but make behavior same as 3
else {
# ok to break
}
# Set a flag for actual implementation later in
# sub insert_breaks_before_list_opening_containers
$rbreak_before_container_by_seqno->{$seqno} = 1;
DEBUG_BBX
&& print {*STDOUT} "BBX: ok to break at seqno=$seqno\n";
# -bbxi=0: Nothing more to do if the ci value remains unchanged
my $ci_flag = $container_indentation_options{$token};
next unless ($ci_flag);
# -bbxi=1: This option removes ci and is handled in
# later sub get_final_indentation
if ( $ci_flag == 1 ) {
$rwant_reduced_ci->{$seqno} = 1;
next;
}
# -bbxi=2: This option changes the level ...
# This option can conflict with -xci in some cases. We can turn off
# -xci for this container to avoid blinking. For now, only do this if
# -vmll is set. ( fixes b1335, b1336 )
if ($rOpts_variable_maximum_line_length) {
$rno_xci_by_seqno->{$seqno} = 1;
}
#----------------------------------------------------------------
# Part 2: Perform tests before committing to changing ci and level
#----------------------------------------------------------------
# Before changing the ci level of the opening container, we need
# to be sure that the container will be broken in the later stages of
# formatting. We have to do this because we are working early in the
# formatting pipeline. A problem can occur if we change the ci or
# level of the opening token but do not actually break the container
# open as expected. In most cases it wouldn't make any difference if
# we changed ci or not, but there are some edge cases where this
# can cause blinking states, so we need to try to only change ci if
# the container will really be broken.
# Only consider containers already broken
next if ( !$ris_broken_container->{$seqno} );
# Patch to fix issue b1305: the combination of -naws and ci>i appears
# to cause an instability. It should almost never occur in practice.
next
if (!$rOpts_add_whitespace
&& $rOpts_continuation_indentation > $rOpts_indent_columns );
# Always ok to change ci for permanently broken containers
if ( $ris_permanently_broken->{$seqno} ) { }
# Always OK if this list contains a broken sub-container with
# a non-terminal line-ending comma
elsif ($has_list_with_lec) { }
# Otherwise, we are considering a single container...
else {
# A single container must have at least 1 line-ending comma:
next unless ( $rlec_count_by_seqno->{$seqno} );
my $OK;
# Since it has a line-ending comma, it will stay broken if the
# -boc flag is set
if ($rOpts_break_at_old_comma_breakpoints) { $OK = 1 }
# OK if the container contains multiple fat commas
# Better: multiple lines with fat commas
if ( !$OK && !$rOpts_ignore_old_breakpoints ) {
my $rtype_count = $rtype_count_by_seqno->{$seqno};
next unless ($rtype_count);
my $fat_comma_count = $rtype_count->{'=>'};
DEBUG_BBX
&& print {*STDOUT} "BBX: fat comma count=$fat_comma_count\n";
if ( $fat_comma_count && $fat_comma_count >= 2 ) { $OK = 1 }
}
# The last check we can make is to see if this container could
# fit on a single line. Use the least possible indentation
# estimate, ci=0, so we are not subtracting $ci *
# $rOpts_continuation_indentation from tabulated
# $maximum_text_length value.
if ( !$OK ) {
my $maximum_text_length = $maximum_text_length_at_level[$level];
my $K_closing = $K_closing_container->{$seqno};
my $length = $self->cumulative_length_before_K($K_closing) -
$self->cumulative_length_before_K($KK);
my $excess_length = $length - $maximum_text_length;
DEBUG_BBX
&& print {*STDOUT}
"BBX: excess=$excess_length: maximum_text_length=$maximum_text_length, length=$length, ci=$ci\n";
# OK if the net container definitely breaks on length
if ( $excess_length > $length_tol ) {
$OK = 1;
DEBUG_BBX
&& print {*STDOUT} "BBX: excess_length=$excess_length\n";
}
# Otherwise skip it
else { next }
}
}
#------------------------------------------------------------
# Part 3: Looks OK: apply -bbx=n and any related -bbxi=n flag
#------------------------------------------------------------
DEBUG_BBX && print {*STDOUT} "BBX: OK to break\n";
# -bbhbi=n
# -bbsbi=n
# -bbpi=n
# where:
# n=0 default indentation (usually one ci)
# n=1 outdent one ci
# n=2 indent one level (minus one ci)
# n=3 indent one extra ci [This may be dropped]
# NOTE: We are adjusting indentation of the opening container. The
# closing container will normally follow the indentation of the opening
# container automatically, so this is not currently done.
next unless ($ci);
# option 1: outdent
if ( $ci_flag == 1 ) {
$ci -= 1;
}
# option 2: indent one level
elsif ( $ci_flag == 2 ) {
$ci -= 1;
$radjusted_levels->[$KK] += 1;
}
# unknown option
else {
# Shouldn't happen - leave ci unchanged
}
$rLL->[$KK]->[_CI_LEVEL_] = $ci if ( $ci >= 0 );
}
$self->[_rbreak_before_container_by_seqno_] =
$rbreak_before_container_by_seqno;
$self->[_rwant_reduced_ci_] = $rwant_reduced_ci;
return;
} ## end sub break_before_list_opening_containers
use constant DEBUG_XCI => 0;
sub extended_ci {
# This routine implements the -xci (--extended-continuation-indentation)
# flag. We add CI to interior tokens of a container which itself has CI but
# only if a token does not already have CI.
# To do this, we will locate opening tokens which themselves have
# continuation indentation (CI). We track them with their sequence
# numbers. These sequence numbers are called 'controlling sequence
# numbers'. They apply continuation indentation to the tokens that they
# contain. These inner tokens remember their controlling sequence numbers.
# Later, when these inner tokens are output, they have to see if the output
# lines with their controlling tokens were output with CI or not. If not,
# then they must remove their CI too.
# The controlling CI concept works hierarchically. But CI itself is not
# hierarchical; it is either on or off. There are some rare instances where
# it would be best to have hierarchical CI too, but not enough to be worth
# the programming effort.
# The operations to remove unwanted CI are done in sub 'undo_ci'.
my ($self) = @_;
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
my $ris_bli_container = $self->[_ris_bli_container_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my %available_space;
# Loop over all opening container tokens
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my @seqno_stack;
my $seqno_top;
my $KLAST;
my $KNEXT = $self->[_K_first_seq_item_];
# The following variable can be used to allow a little extra space to
# avoid blinkers. A value $len_tol = 20 fixed the following
# fixes cases: b1025 b1026 b1027 b1028 b1029 b1030 but NOT b1031.
# It turned out that the real problem was mis-parsing a list brace as
# a code block in a 'use' statement when the line length was extremely
# small. A value of 0 works now, but a slightly larger value can
# be used to minimize the chance of a blinker.
my $len_tol = 0;
while ( defined($KNEXT) ) {
# Fix all tokens up to the next sequence item if we are changing CI
if ($seqno_top) {
my $is_list = $ris_list_by_seqno->{$seqno_top};
my $space = $available_space{$seqno_top};
my $count = 0;
foreach my $Kt ( $KLAST + 1 .. $KNEXT - 1 ) {
next if ( $rLL->[$Kt]->[_CI_LEVEL_] );
# But do not include tokens which might exceed the line length
# and are not in a list.
# ... This fixes case b1031
if ( $is_list
|| $rLL->[$Kt]->[_TOKEN_LENGTH_] < $space
|| $rLL->[$Kt]->[_TYPE_] eq '#' )
{
$rLL->[$Kt]->[_CI_LEVEL_] = 1;
$rseqno_controlling_my_ci->{$Kt} = $seqno_top;
$count++;
}
}
$ris_seqno_controlling_ci->{$seqno_top} += $count;
}
$KLAST = $KNEXT;
my $KK = $KNEXT;
$KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
# see if we have reached the end of the current controlling container
if ( $seqno_top && $seqno == $seqno_top ) {
$seqno_top = pop @seqno_stack;
}
# Patch to fix some block types...
# Certain block types arrive from the tokenizer without CI but should
# have it for this option. These include anonymous subs and
# do sort map grep eval
my $block_type = $rblock_type_of_seqno->{$seqno};
if ( $block_type && $is_block_with_ci{$block_type} ) {
$rLL->[$KK]->[_CI_LEVEL_] = 1;
if ($seqno_top) {
$rseqno_controlling_my_ci->{$KK} = $seqno_top;
$ris_seqno_controlling_ci->{$seqno_top}++;
}
}
# If this does not have ci, update ci if necessary and continue looking
else {
if ( !$rLL->[$KK]->[_CI_LEVEL_] ) {
if ($seqno_top) {
$rLL->[$KK]->[_CI_LEVEL_] = 1;
$rseqno_controlling_my_ci->{$KK} = $seqno_top;
$ris_seqno_controlling_ci->{$seqno_top}++;
}
next;
}
}
# We are looking for opening container tokens with ci
my $K_opening = $K_opening_container->{$seqno};
next unless ( defined($K_opening) && $KK == $K_opening );
# Make sure there is a corresponding closing container
# (could be missing if the script has a brace error)
my $K_closing = $K_closing_container->{$seqno};
next unless defined($K_closing);
# Skip if requested by -bbx to avoid blinkers
next if ( $rno_xci_by_seqno->{$seqno} );
# Skip if this is a -bli container (this fixes case b1065) Note: case
# b1065 is also fixed by the update for b1055, so this update is not
# essential now. But there does not seem to be a good reason to add
# xci and bli together, so the update is retained.
next if ( $ris_bli_container->{$seqno} );
# Require different input lines. This will filter out a large number
# of small hash braces and array brackets. If we accidentally filter
# out an important container, it will get fixed on the next pass.
if (
$rLL->[$K_opening]->[_LINE_INDEX_] ==
$rLL->[$K_closing]->[_LINE_INDEX_]
&& ( $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
$rLL->[$K_opening]->[_CUMULATIVE_LENGTH_] >
$rOpts_maximum_line_length )
)
{
DEBUG_XCI
&& print "XCI: Skipping seqno=$seqno, require different lines\n";
next;
}
# Do not apply -xci if adding extra ci will put the container contents
# beyond the line length limit (fixes cases b899 b935)
my $level = $rLL->[$K_opening]->[_LEVEL_];
my $ci_level = $rLL->[$K_opening]->[_CI_LEVEL_];
my $maximum_text_length =
$maximum_text_length_at_level[$level] -
$ci_level * $rOpts_continuation_indentation;
# Fix for b1197 b1198 b1199 b1200 b1201 b1202
# Do not apply -xci if we are running out of space
# TODO: review this; do we also need to look at stress_level_alpha?
if ( $level >= $stress_level_beta ) {
DEBUG_XCI
&& print
"XCI: Skipping seqno=$seqno, level=$level exceeds stress level=$stress_level_beta\n";
next;
}
# remember how much space is available for patch b1031 above
my $space =
$maximum_text_length - $len_tol - $rOpts_continuation_indentation;
if ( $space < 0 ) {
DEBUG_XCI && print "XCI: Skipping seqno=$seqno, space=$space\n";
next;
}
DEBUG_XCI && print "XCI: OK seqno=$seqno, space=$space\n";
$available_space{$seqno} = $space;
# This becomes the next controlling container
push @seqno_stack, $seqno_top if ($seqno_top);
$seqno_top = $seqno;
}
return;
} ## end sub extended_ci
sub braces_left_setup {
# Called once per file to mark all -bl, -sbl, and -asbl containers
my $self = shift;
my $rOpts_bl = $rOpts->{'opening-brace-on-new-line'};
my $rOpts_sbl = $rOpts->{'opening-sub-brace-on-new-line'};
my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
return unless ( $rOpts_bl || $rOpts_sbl || $rOpts_asbl );
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
# We will turn on this hash for braces controlled by these flags:
my $rbrace_left = $self->[_rbrace_left_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $ris_asub_block = $self->[_ris_asub_block_];
my $ris_sub_block = $self->[_ris_sub_block_];
foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
my $block_type = $rblock_type_of_seqno->{$seqno};
# use -asbl flag for an anonymous sub block
if ( $ris_asub_block->{$seqno} ) {
if ($rOpts_asbl) {
$rbrace_left->{$seqno} = 1;
}
}
# use -sbl flag for a named sub
elsif ( $ris_sub_block->{$seqno} ) {
if ($rOpts_sbl) {
$rbrace_left->{$seqno} = 1;
}
}
# use -bl flag if not a sub block of any type
else {
if ( $rOpts_bl
&& $block_type =~ /$bl_pattern/
&& $block_type !~ /$bl_exclusion_pattern/ )
{
$rbrace_left->{$seqno} = 1;
}
}
}
return;
} ## end sub braces_left_setup
sub bli_adjustment {
# Called once per file to implement the --brace-left-and-indent option.
# If -bli is set, adds one continuation indentation for certain braces
my $self = shift;
return unless ( $rOpts->{'brace-left-and-indent'} );
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $ris_bli_container = $self->[_ris_bli_container_];
my $rbrace_left = $self->[_rbrace_left_];
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
my $block_type = $rblock_type_of_seqno->{$seqno};
if ( $block_type
&& $block_type =~ /$bli_pattern/
&& $block_type !~ /$bli_exclusion_pattern/ )
{
$ris_bli_container->{$seqno} = 1;
$rbrace_left->{$seqno} = 1;
my $Ko = $K_opening_container->{$seqno};
my $Kc = $K_closing_container->{$seqno};
if ( defined($Ko) && defined($Kc) ) {
$rLL->[$Kc]->[_CI_LEVEL_] = ++$rLL->[$Ko]->[_CI_LEVEL_];
}
}
}
return;
} ## end sub bli_adjustment
sub find_multiline_qw {
my ( $self, $rqw_lines ) = @_;
# Multiline qw quotes are not sequenced items like containers { [ (
# but behave in some respects in a similar way. So this routine finds them
# and creates a separate sequence number system for later use.
# This is straightforward because they always begin at the end of one line
# and end at the beginning of a later line. This is true no matter how we
# finally make our line breaks, so we can find them before deciding on new
# line breaks.
# Input parameter:
# if $rqw_lines is defined it is a ref to array of all line index numbers
# for which there is a type 'q' qw quote at either end of the line. This
# was defined by sub resync_lines_and_tokens for efficiency.
#
my $rlines = $self->[_rlines_];
# if $rqw_lines is not defined (this will occur with -io option) then we
# will have to scan all lines.
if ( !defined($rqw_lines) ) {
$rqw_lines = [ 0 .. @{$rlines} - 1 ];
}
# if $rqw_lines is defined but empty, just return because there are no
# multiline qw's
else {
if ( !@{$rqw_lines} ) { return }
}
my $rstarting_multiline_qw_seqno_by_K = {};
my $rending_multiline_qw_seqno_by_K = {};
my $rKrange_multiline_qw_by_seqno = {};
my $rmultiline_qw_has_extra_level = {};
my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
my $rLL = $self->[_rLL_];
my $qw_seqno;
my $num_qw_seqno = 0;
my $K_start_multiline_qw;
# For reference, here is the old loop, before $rqw_lines became available:
## foreach my $line_of_tokens ( @{$rlines} ) {
foreach my $iline ( @{$rqw_lines} ) {
my $line_of_tokens = $rlines->[$iline];
# Note that these first checks are required in case we have to scan
# all lines, not just lines with type 'q' at the ends.
my $line_type = $line_of_tokens->{_line_type};
next unless ( $line_type eq 'CODE' );
my $rK_range = $line_of_tokens->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
next unless ( defined($Kfirst) && defined($Klast) ); # skip blank line
# Continuing a sequence of qw lines ...
if ( defined($K_start_multiline_qw) ) {
my $type = $rLL->[$Kfirst]->[_TYPE_];
# shouldn't happen
if ( $type ne 'q' ) {
DEVEL_MODE && print {*STDERR} <<EOM;
STRANGE: started multiline qw at K=$K_start_multiline_qw but didn't see q qw at K=$Kfirst\n";
EOM
$K_start_multiline_qw = undef;
next;
}
my $Kprev = $self->K_previous_nonblank($Kfirst);
my $Knext = $self->K_next_nonblank($Kfirst);
my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
if ( $type_m eq 'q' && $type_p ne 'q' ) {
$rending_multiline_qw_seqno_by_K->{$Kfirst} = $qw_seqno;
$rKrange_multiline_qw_by_seqno->{$qw_seqno} =
[ $K_start_multiline_qw, $Kfirst ];
$K_start_multiline_qw = undef;
$qw_seqno = undef;
}
}
# Starting a new a sequence of qw lines ?
if ( !defined($K_start_multiline_qw)
&& $rLL->[$Klast]->[_TYPE_] eq 'q' )
{
my $Kprev = $self->K_previous_nonblank($Klast);
my $Knext = $self->K_next_nonblank($Klast);
my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
if ( $type_m ne 'q' && $type_p eq 'q' ) {
$num_qw_seqno++;
$qw_seqno = 'q' . $num_qw_seqno;
$K_start_multiline_qw = $Klast;
$rstarting_multiline_qw_seqno_by_K->{$Klast} = $qw_seqno;
}
}
}
# Give multiline qw lists extra indentation instead of CI. This option
# works well but is currently only activated when the -xci flag is set.
# The reason is to avoid unexpected changes in formatting.
if ($rOpts_extended_continuation_indentation) {
while ( my ( $qw_seqno_x, $rKrange ) =
each %{$rKrange_multiline_qw_by_seqno} )
{
my ( $Kbeg, $Kend ) = @{$rKrange};
# require isolated closing token
my $token_end = $rLL->[$Kend]->[_TOKEN_];
next
unless ( length($token_end) == 1
&& ( $is_closing_token{$token_end} || $token_end eq '>' ) );
# require isolated opening token
my $token_beg = $rLL->[$Kbeg]->[_TOKEN_];
# allow space(s) after the qw
if ( length($token_beg) > 3 && substr( $token_beg, 2, 1 ) =~ m/\s/ )
{
$token_beg =~ s/\s+//;
}
next unless ( length($token_beg) == 3 );
foreach my $KK ( $Kbeg + 1 .. $Kend - 1 ) {
$rLL->[$KK]->[_LEVEL_]++;
$rLL->[$KK]->[_CI_LEVEL_] = 0;
}
# set flag for -wn option, which will remove the level
$rmultiline_qw_has_extra_level->{$qw_seqno_x} = 1;
}
}
# For the -lp option we need to mark all parent containers of
# multiline quotes
if ( $rOpts_line_up_parentheses && !$rOpts_extended_line_up_parentheses ) {
while ( my ( $qw_seqno_x, $rKrange ) =
each %{$rKrange_multiline_qw_by_seqno} )
{
my ( $Kbeg, $Kend ) = @{$rKrange};
my $parent_seqno = $self->parent_seqno_by_K($Kend);
next unless ($parent_seqno);
# If the parent container exactly surrounds this qw, then -lp
# formatting seems to work so we will not mark it.
my $is_tightly_contained;
my $Kn = $self->K_next_nonblank($Kend);
my $seqno_n = defined($Kn) ? $rLL->[$Kn]->[_TYPE_SEQUENCE_] : undef;
if ( defined($seqno_n) && $seqno_n eq $parent_seqno ) {
my $Kp = $self->K_previous_nonblank($Kbeg);
my $seqno_p =
defined($Kp) ? $rLL->[$Kp]->[_TYPE_SEQUENCE_] : undef;
if ( defined($seqno_p) && $seqno_p eq $parent_seqno ) {
$is_tightly_contained = 1;
}
}
$ris_excluded_lp_container->{$parent_seqno} = 1
unless ($is_tightly_contained);
# continue up the tree marking parent containers
while (1) {
$parent_seqno = $self->[_rparent_of_seqno_]->{$parent_seqno};
last if ( !defined($parent_seqno) );
last if ( $parent_seqno eq SEQ_ROOT );
$ris_excluded_lp_container->{$parent_seqno} = 1;
}
}
}
$self->[_rstarting_multiline_qw_seqno_by_K_] =
$rstarting_multiline_qw_seqno_by_K;
$self->[_rending_multiline_qw_seqno_by_K_] =
$rending_multiline_qw_seqno_by_K;
$self->[_rKrange_multiline_qw_by_seqno_] = $rKrange_multiline_qw_by_seqno;
$self->[_rmultiline_qw_has_extra_level_] = $rmultiline_qw_has_extra_level;
return;
} ## end sub find_multiline_qw
use constant DEBUG_COLLAPSED_LENGTHS => 0;
# Minimum space reserved for contents of a code block. A value of 40 has given
# reasonable results. With a large line length, say -l=120, this will not
# normally be noticeable but it will prevent making a mess in some edge cases.
use constant MIN_BLOCK_LEN => 40;
my %is_handle_type;
BEGIN {
my @q = qw( w C U G i k => );
@is_handle_type{@q} = (1) x scalar(@q);
my $i = 0;
use constant {
_max_prong_len_ => $i++,
_handle_len_ => $i++,
_seqno_o_ => $i++,
_iline_o_ => $i++,
_K_o_ => $i++,
_K_c_ => $i++,
_interrupted_list_rule_ => $i++,
};
} ## end BEGIN
sub is_fragile_block_type {
my ( $self, $block_type, $seqno ) = @_;
# Given:
# $block_type = the block type of a token, and
# $seqno = its sequence number
# Return:
# true if this block type stays broken after being broken,
# false otherwise
# This sub has been added to isolate a tricky decision needed
# to fix issue b1428.
# The coding here needs to agree with:
# - sub process_line where variable '$rbrace_follower' is set
# - sub process_line_inner_loop where variable '$is_opening_BLOCK' is set,
if ( $is_sort_map_grep_eval{$block_type}
|| $block_type eq 't'
|| $self->[_rshort_nested_]->{$seqno} )
{
return 0;
}
return 1;
} ## end sub is_fragile_block_type
{ ## closure xlp_collapsed_lengths
my $max_prong_len;
my $len;
my $last_nonblank_type;
my @stack;
sub xlp_collapsed_lengths_initialize {
$max_prong_len = 0;
$len = 0;
$last_nonblank_type = 'b';
@stack = ();
push @stack, [
0, # $max_prong_len,
0, # $handle_len,
SEQ_ROOT, # $seqno,
undef, # $iline,
undef, # $KK,
undef, # $K_c,
undef, # $interrupted_list_rule
];
return;
} ## end sub xlp_collapsed_lengths_initialize
sub cumulative_length_to_comma {
my ( $self, $KK, $K_comma, $K_closing ) = @_;
# Given:
# $KK = index of starting token, or blank before start
# $K_comma = index of line-ending comma
# $K_closing = index of the container closing token
# Return:
# $length = cumulative length of the term
my $rLL = $self->[_rLL_];
if ( $rLL->[$KK]->[_TYPE_] eq 'b' ) { $KK++ }
my $length = 0;
if (
$KK < $K_comma
&& $rLL->[$K_comma]->[_TYPE_] eq ',' # should be true
# Ignore if terminal comma, causes instability (b1297,
# b1330)
&& (
$K_closing - $K_comma > 2
|| ( $K_closing - $K_comma == 2
&& $rLL->[ $K_comma + 1 ]->[_TYPE_] ne 'b' )
)
# The comma should be in this container
&& ( $rLL->[$K_comma]->[_LEVEL_] - 1 ==
$rLL->[$K_closing]->[_LEVEL_] )
)
{
# An additional check: if line ends in ), and the ) has vtc then
# skip this estimate. Otherwise, vtc can give oscillating results.
# Fixes b1448. For example, this could be unstable:
# ( $os ne 'win' ? ( -selectcolor => "red" ) : () ),
# | |^--K_comma
# | ^-- K_prev
# ^--- KK
# An alternative, possibly better strategy would be to try to turn
# off -vtc locally, but it turns out to be difficult to locate the
# appropriate closing token when it is not on the same line as its
# opening token.
my $K_prev = $self->K_previous_nonblank($K_comma);
if ( defined($K_prev)
&& $K_prev >= $KK
&& $rLL->[$K_prev]->[_TYPE_SEQUENCE_] )
{
my $token = $rLL->[$K_prev]->[_TOKEN_];
my $type = $rLL->[$K_prev]->[_TYPE_];
if ( $closing_vertical_tightness{$token} && $type ne 'R' ) {
## type 'R' does not normally get broken, so ignore
## skip length calculation
return 0;
}
}
my $starting_len =
$KK >= 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
$length = $rLL->[$K_comma]->[_CUMULATIVE_LENGTH_] - $starting_len;
}
return $length;
} ## end sub cumulative_length_to_comma
sub xlp_collapsed_lengths {
my $self = shift;
#----------------------------------------------------------------
# Define the collapsed lengths of containers for -xlp indentation
#----------------------------------------------------------------
# We need an estimate of the minimum required line length starting at
# any opening container for the -xlp style. This is needed to avoid
# using too much indentation space for lower level containers and
# thereby running out of space for outer container tokens due to the
# maximum line length limit.
# The basic idea is that at each node in the tree we imagine that we
# have a fork with a handle and collapsible prongs:
#
# |------------
# |--------
# ------------|-------
# handle |------------
# |--------
# prongs
#
# Each prong has a minimum collapsed length. The collapsed length at a
# node is the maximum of these minimum lengths, plus the handle length.
# Each of the prongs may itself be a tree node.
# This is just a rough calculation to get an approximate starting point
# for indentation. Later routines will be more precise. It is
# important that these estimates be independent of the line breaks of
# the input stream in order to avoid instabilities.
my $rLL = $self->[_rLL_];
my $rlines = $self->[_rlines_];
my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
my $K_start_multiline_qw;
my $level_start_multiline_qw = 0;
xlp_collapsed_lengths_initialize();
#--------------------------------
# Loop over all lines in the file
#--------------------------------
my $iline = -1;
my $skip_next_line;
foreach my $line_of_tokens ( @{$rlines} ) {
$iline++;
if ($skip_next_line) {
$skip_next_line = 0;
next;
}
my $line_type = $line_of_tokens->{_line_type};
next if ( $line_type ne 'CODE' );
my $CODE_type = $line_of_tokens->{_code_type};
# Always skip blank lines
next if ( $CODE_type eq 'BL' );
# Note on other line types:
# 'FS' (Format Skipping) lines may contain opening/closing tokens so
# we have to process them to keep the stack correctly sequenced
# 'VB' (Verbatim) lines could be skipped, but testing shows that
# results look better if we include their lengths.
# Also note that we could exclude -xlp formatting of containers with
# 'FS' and 'VB' lines, but in testing that was not really beneficial
# So we process tokens in 'FS' and 'VB' lines like all the rest...
my $rK_range = $line_of_tokens->{_rK_range};
my ( $K_first, $K_last ) = @{$rK_range};
next unless ( defined($K_first) && defined($K_last) );
my $has_comment = $rLL->[$K_last]->[_TYPE_] eq '#';
# Always ignore block comments
next if ( $has_comment && $K_first == $K_last );
# Handle an intermediate line of a multiline qw quote. These may
# require including some -ci or -i spaces. See cases c098/x063.
# Updated to check all lines (not just $K_first==$K_last) to fix
# b1316
my $K_begin_loop = $K_first;
if ( $rLL->[$K_first]->[_TYPE_] eq 'q' ) {
my $KK = $K_first;
my $level = $rLL->[$KK]->[_LEVEL_];
my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
# remember the level of the start
if ( !defined($K_start_multiline_qw) ) {
$K_start_multiline_qw = $K_first;
$level_start_multiline_qw = $level;
my $seqno_qw =
$self->[_rstarting_multiline_qw_seqno_by_K_]
->{$K_start_multiline_qw};
if ( !$seqno_qw ) {
my $Kp = $self->K_previous_nonblank($K_first);
if ( defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' ) {
$K_start_multiline_qw = $Kp;
$level_start_multiline_qw =
$rLL->[$K_start_multiline_qw]->[_LEVEL_];
}
else {
# Fix for b1319, b1320
$K_start_multiline_qw = undef;
}
}
}
if ( defined($K_start_multiline_qw) ) {
$len = $rLL->[$KK]->[_CUMULATIVE_LENGTH_] -
$rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
# We may have to add the spaces of one level or ci level
# ... it depends depends on the -xci flag, the -wn flag,
# and if the qw uses a container token as the quote
# delimiter.
# First rule: add ci if there is a $ci_level
if ($ci_level) {
$len += $rOpts_continuation_indentation;
}
# Second rule: otherwise, look for an extra indentation
# level from the start and add one indentation level if
# found.
else {
if ( $level > $level_start_multiline_qw ) {
$len += $rOpts_indent_columns;
}
}
if ( $len > $max_prong_len ) { $max_prong_len = $len }
$last_nonblank_type = 'q';
$K_begin_loop = $K_first + 1;
# We can skip to the next line if more tokens
next if ( $K_begin_loop > $K_last );
}
}
$K_start_multiline_qw = undef;
# Find the terminal token, before any side comment
my $K_terminal = $K_last;
if ($has_comment) {
$K_terminal -= 1;
$K_terminal -= 1
if ( $rLL->[$K_terminal]->[_TYPE_] eq 'b'
&& $K_terminal > $K_first );
}
# Use length to terminal comma if interrupted list rule applies
if ( @stack && $stack[-1]->[_interrupted_list_rule_] ) {
my $K_c = $stack[-1]->[_K_c_];
if ( defined($K_c) ) {
#----------------------------------------------------------
# BEGIN patch for issue b1408: If this line ends in an
# opening token, look for the closing token and comma at
# the end of the next line. If so, combine the two lines to
# get the correct sums. This problem seems to require -xlp
# -vtc=2 and blank lines to occur. Use %is_opening_type to
# fix b1431.
#----------------------------------------------------------
if ( $is_opening_type{ $rLL->[$K_terminal]->[_TYPE_] }
&& !$has_comment )
{
my $seqno_end = $rLL->[$K_terminal]->[_TYPE_SEQUENCE_];
my $Kc_test = $rLL->[$K_terminal]->[_KNEXT_SEQ_ITEM_];
# We are looking for a short broken remnant on the next
# line; something like the third line here (b1408):
# parent =>
# Moose::Util::TypeConstraints::find_type_constraint(
# 'RefXX' ),
# or this
#
# Help::WorkSubmitter->_filter_chores_and_maybe_warn_user(
# $story_set_all_chores),
# or this (b1431):
# $issue->{
# 'borrowernumber'}, # borrowernumber
if ( defined($Kc_test)
&& $seqno_end == $rLL->[$Kc_test]->[_TYPE_SEQUENCE_]
&& $rLL->[$Kc_test]->[_LINE_INDEX_] == $iline + 1 )
{
my $line_of_tokens_next = $rlines->[ $iline + 1 ];
my $rtype_count =
$rtype_count_by_seqno->{$seqno_end};
my ( $K_first_next, $K_terminal_next ) =
@{ $line_of_tokens_next->{_rK_range} };
# backup at a side comment
if ( defined($K_terminal_next)
&& $rLL->[$K_terminal_next]->[_TYPE_] eq '#' )
{
my $Kprev =
$self->K_previous_nonblank($K_terminal_next);
if ( defined($Kprev)
&& $Kprev >= $K_first_next )
{
$K_terminal_next = $Kprev;
}
}
if (
defined($K_terminal_next)
# next line ends with a comma
&& $rLL->[$K_terminal_next]->[_TYPE_] eq ','
# which follows the closing container token
&& (
$K_terminal_next - $Kc_test == 1
|| ( $K_terminal_next - $Kc_test == 2
&& $rLL->[ $K_terminal_next - 1 ]
->[_TYPE_] eq 'b' )
)
# no commas in the container
&& ( !defined($rtype_count)
|| !$rtype_count->{','} )
# for now, restrict this to a container with
# just 1 or two tokens
&& $K_terminal_next - $K_terminal <= 5
)
{
# combine the next line with the current line
$K_terminal = $K_terminal_next;
$skip_next_line = 1;
if (DEBUG_COLLAPSED_LENGTHS) {
print "Combining lines at line $iline\n";
}
}
}
}
#--------------------------
# END patch for issue b1408
#--------------------------
if ( $rLL->[$K_terminal]->[_TYPE_] eq ',' ) {
my $length =
$self->cumulative_length_to_comma( $K_first,
$K_terminal, $K_c );
# Fix for b1331: at a broken => item, include the
# length of the previous half of the item plus one for
# the missing space
if ( $last_nonblank_type eq '=>' ) {
$length += $len + 1;
}
if ( $length > $max_prong_len ) {
$max_prong_len = $length;
}
}
}
}
#----------------------------------
# Loop over all tokens on this line
#----------------------------------
$self->xlp_collapse_lengths_inner_loop( $iline, $K_begin_loop,
$K_terminal, $K_last );
# Now take care of any side comment;
if ($has_comment) {
if ($rOpts_ignore_side_comment_lengths) {
$len = 0;
}
else {
# For a side comment when -iscl is not set, measure length from
# the start of the previous nonblank token
my $len0 =
$K_terminal > 0
? $rLL->[ $K_terminal - 1 ]->[_CUMULATIVE_LENGTH_]
: 0;
$len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] - $len0;
if ( $len > $max_prong_len ) { $max_prong_len = $len }
}
}
} ## end loop over lines
if (DEBUG_COLLAPSED_LENGTHS) {
print "\nCollapsed lengths--\n";
foreach
my $key ( sort { $a <=> $b } keys %{$rcollapsed_length_by_seqno} )
{
my $clen = $rcollapsed_length_by_seqno->{$key};
print "$key -> $clen\n";
}
}
return;
} ## end sub xlp_collapsed_lengths
sub xlp_collapse_lengths_inner_loop {
my ( $self, $iline, $K_begin_loop, $K_terminal, $K_last ) = @_;
my $rLL = $self->[_rLL_];
my $K_closing_container = $self->[_K_closing_container_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
my $ris_permanently_broken = $self->[_ris_permanently_broken_];
my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
my $rhas_broken_list = $self->[_rhas_broken_list_];
my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
#----------------------------------
# Loop over tokens on this line ...
#----------------------------------
foreach my $KK ( $K_begin_loop .. $K_terminal ) {
my $type = $rLL->[$KK]->[_TYPE_];
next if ( $type eq 'b' );
#------------------------
# Handle sequenced tokens
#------------------------
my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
if ($seqno) {
my $token = $rLL->[$KK]->[_TOKEN_];
#----------------------------
# Entering a new container...
#----------------------------
if ( $is_opening_token{$token}
&& defined( $K_closing_container->{$seqno} ) )
{
# save current prong length
$stack[-1]->[_max_prong_len_] = $max_prong_len;
$max_prong_len = 0;
# Start new prong one level deeper
my $handle_len = 0;
if ( $rblock_type_of_seqno->{$seqno} ) {
# code blocks do not use -lp indentation, but behave as
# if they had a handle of one indentation length
$handle_len = $rOpts_indent_columns;
}
else {
if ( $is_handle_type{$last_nonblank_type} ) {
$handle_len = $len;
$handle_len += 1
if ( $KK > 0
&& $rLL->[ $KK - 1 ]->[_TYPE_] eq 'b' );
}
}
# Set a flag if the 'Interrupted List Rule' will be applied
# (see sub copy_old_breakpoints).
# - Added check on has_broken_list to fix issue b1298
my $interrupted_list_rule =
$ris_permanently_broken->{$seqno}
&& $ris_list_by_seqno->{$seqno}
&& !$rhas_broken_list->{$seqno}
&& !$rOpts_ignore_old_breakpoints;
# NOTES: Since we are looking at old line numbers we have
# to be very careful not to introduce an instability.
# This following causes instability (b1288-b1296):
# $interrupted_list_rule ||=
# $rOpts_break_at_old_comma_breakpoints;
# - We could turn off the interrupted list rule if there is
# a broken sublist, to follow 'Compound List Rule 1'.
# - We could use the _rhas_broken_list_ flag for this.
# - But it seems safer not to do this, to avoid
# instability, since the broken sublist could be
# temporary. It seems better to let the formatting
# stabilize by itself after one or two iterations.
# - So, not doing this for now
# Turn off the interrupted list rule if -vmll is set and a
# list has '=>' characters. This avoids instabilities due
# to dependence on old line breaks; issue b1325.
if ( $interrupted_list_rule
&& $rOpts_variable_maximum_line_length )
{
my $rtype_count = $rtype_count_by_seqno->{$seqno};
if ( $rtype_count && $rtype_count->{'=>'} ) {
$interrupted_list_rule = 0;
}
}
my $K_c = $K_closing_container->{$seqno};
# Add length of any terminal list item if interrupted
# so that the result is the same as if the term is
# in the next line (b1446).
if (
$interrupted_list_rule
&& $KK < $K_terminal
# The line should end in a comma
# NOTE: this currently assumes break after comma.
# As long as the other call to cumulative_length..
# makes the same assumption we should remain stable.
&& $rLL->[$K_terminal]->[_TYPE_] eq ','
)
{
$max_prong_len =
$self->cumulative_length_to_comma( $KK + 1,
$K_terminal, $K_c );
}
push @stack, [
$max_prong_len,
$handle_len,
$seqno,
$iline,
$KK,
$K_c,
$interrupted_list_rule
];
}
#--------------------
# Exiting a container
#--------------------
elsif ( $is_closing_token{$token} && @stack ) {
# The current prong ends - get its handle
my $item = pop @stack;
my $handle_len = $item->[_handle_len_];
my $seqno_o = $item->[_seqno_o_];
my $iline_o = $item->[_iline_o_];
my $K_o = $item->[_K_o_];
my $K_c_expect = $item->[_K_c_];
my $collapsed_len = $max_prong_len;
if ( $seqno_o ne $seqno ) {
# This can happen if input file has brace errors.
# Otherwise it shouldn't happen. Not fatal but -lp
# formatting could get messed up.
if ( DEVEL_MODE && !get_saw_brace_error() ) {
Fault(<<EOM);
sequence numbers differ; at CLOSING line $iline, seq=$seqno, Kc=$KK .. at OPENING line $iline_o, seq=$seqno_o, Ko=$K_o, expecting Kc=$K_c_expect
EOM
}
}
#------------------------------------------
# Rules to avoid scrunching code blocks ...
#------------------------------------------
# Some test cases:
# c098/x107 x108 x110 x112 x114 x115 x117 x118 x119
my $block_type = $rblock_type_of_seqno->{$seqno};
if ($block_type) {
my $K_c = $KK;
my $block_length = MIN_BLOCK_LEN;
my $is_one_line_block;
my $level = $rLL->[$K_o]->[_LEVEL_];
if ( defined($K_o) && defined($K_c) ) {
# note: fixed 3 May 2022 (removed 'my')
$block_length =
$rLL->[ $K_c - 1 ]->[_CUMULATIVE_LENGTH_] -
$rLL->[$K_o]->[_CUMULATIVE_LENGTH_];
$is_one_line_block = $iline == $iline_o;
}
# Code block rule 1: Use the total block length if
# it is less than the minimum.
if ( $block_length < MIN_BLOCK_LEN ) {
$collapsed_len = $block_length;
}
# Code block rule 2: Use the full length of a
# one-line block to avoid breaking it, unless
# extremely long. We do not need to do a precise
# check here, because if it breaks then it will
# stay broken on later iterations.
elsif (
$is_one_line_block
&& $block_length <
$maximum_line_length_at_level[$level]
# But skip this for blocks types which can reform,
# like sort/map/grep/eval blocks, to avoid
# instability (b1345, b1428)
&& $self->is_fragile_block_type( $block_type,
$seqno )
)
{
$collapsed_len = $block_length;
}
# Code block rule 3: Otherwise the length should be
# at least MIN_BLOCK_LEN to avoid scrunching code
# blocks.
elsif ( $collapsed_len < MIN_BLOCK_LEN ) {
$collapsed_len = MIN_BLOCK_LEN;
}
else {
## ok
}
}
# Store the result. Some extra space, '2', allows for
# length of an opening token, inside space, comma, ...
# This constant has been tuned to give good overall
# results.
$collapsed_len += 2;
$rcollapsed_length_by_seqno->{$seqno} = $collapsed_len;
# Restart scanning the lower level prong
if (@stack) {
$max_prong_len = $stack[-1]->[_max_prong_len_];
$collapsed_len += $handle_len;
if ( $collapsed_len > $max_prong_len ) {
$max_prong_len = $collapsed_len;
}
}
}
# it is a ternary - no special processing for these yet
else {
}
$len = 0;
$last_nonblank_type = $type;
next;
}
#----------------------------
# Handle non-container tokens
#----------------------------
my $token_length = $rLL->[$KK]->[_TOKEN_LENGTH_];
# Count lengths of things like 'xx => yy' as a single item
if ( $type eq '=>' ) {
$len += $token_length + 1;
# fix $len for -naws, issue b1457
if ( !$rOpts_add_whitespace ) {
if ( defined( $rLL->[ $KK + 1 ] )
&& $rLL->[ $KK + 1 ]->[_TYPE_] ne 'b' )
{
$len -= 1;
}
}
if ( $len > $max_prong_len ) { $max_prong_len = $len }
}
elsif ( $last_nonblank_type eq '=>' ) {
$len += $token_length;
if ( $len > $max_prong_len ) { $max_prong_len = $len }
# but only include one => per item
$len = $token_length;
}
# include everything to end of line after a here target
elsif ( $type eq 'h' ) {
$len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] -
$rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
if ( $len > $max_prong_len ) { $max_prong_len = $len }
}
# for everything else just use the token length
else {
$len = $token_length;
if ( $len > $max_prong_len ) { $max_prong_len = $len }
}
$last_nonblank_type = $type;
} ## end loop over tokens on this line
return;
} ## end sub xlp_collapse_lengths_inner_loop
} ## end closure xlp_collapsed_lengths
sub is_excluded_lp {
# Decide if this container is excluded by user request:
# returns true if this token is excluded (i.e., may not use -lp)
# returns false otherwise
# The control hash can either describe:
# what to exclude: $line_up_parentheses_control_is_lxpl = 1, or
# what to include: $line_up_parentheses_control_is_lxpl = 0
# Input parameter:
# $KK = index of the container opening token
my ( $self, $KK ) = @_;
my $rLL = $self->[_rLL_];
my $rtoken_vars = $rLL->[$KK];
my $token = $rtoken_vars->[_TOKEN_];
my $rflags = $line_up_parentheses_control_hash{$token};
#-----------------------------------------------
# TEST #1: check match to listed container types
#-----------------------------------------------
if ( !defined($rflags) ) {
# There is no entry for this container, so we are done
return !$line_up_parentheses_control_is_lxpl;
}
my ( $flag1, $flag2 ) = @{$rflags};
#-----------------------------------------------------------
# TEST #2: check match to flag1, the preceding nonblank word
#-----------------------------------------------------------
my $match_flag1 = !defined($flag1) || $flag1 eq '*';
if ( !$match_flag1 ) {
# Find the previous token
my ( $is_f, $is_k, $is_w );
my $Kp = $self->K_previous_nonblank($KK);
if ( defined($Kp) ) {
my $type_p = $rLL->[$Kp]->[_TYPE_];
my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
# keyword?
$is_k = $type_p eq 'k';
# function call?
$is_f = $self->[_ris_function_call_paren_]->{$seqno};
# either keyword or function call?
$is_w = $is_k || $is_f;
}
# Check for match based on flag1 and the previous token:
if ( $flag1 eq 'k' ) { $match_flag1 = $is_k }
elsif ( $flag1 eq 'K' ) { $match_flag1 = !$is_k }
elsif ( $flag1 eq 'f' ) { $match_flag1 = $is_f }
elsif ( $flag1 eq 'F' ) { $match_flag1 = !$is_f }
elsif ( $flag1 eq 'w' ) { $match_flag1 = $is_w }
elsif ( $flag1 eq 'W' ) { $match_flag1 = !$is_w }
else {
## no match
}
}
# See if we can exclude this based on the flag1 test...
if ($line_up_parentheses_control_is_lxpl) {
return 1 if ($match_flag1);
}
else {
return 1 if ( !$match_flag1 );
}
#-------------------------------------------------------------
# TEST #3: exclusion based on flag2 and the container contents
#-------------------------------------------------------------
# Note that this is an exclusion test for both -lpxl or -lpil input methods
# The options are:
# 0 or blank: ignore container contents
# 1 exclude non-lists or lists with sublists
# 2 same as 1 but also exclude lists with code blocks
my $match_flag2;
if ($flag2) {
my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
my $is_list = $self->[_ris_list_by_seqno_]->{$seqno};
my $has_list = $self->[_rhas_list_]->{$seqno};
my $has_code_block = $self->[_rhas_code_block_]->{$seqno};
my $has_ternary = $self->[_rhas_ternary_]->{$seqno};
if ( !$is_list
|| $has_list
|| $flag2 eq '2' && ( $has_code_block || $has_ternary ) )
{
$match_flag2 = 1;
}
}
return $match_flag2;
} ## end sub is_excluded_lp
sub set_excluded_lp_containers {
my ($self) = @_;
return unless ($rOpts_line_up_parentheses);
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $K_opening_container = $self->[_K_opening_container_];
my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
foreach my $seqno ( keys %{$K_opening_container} ) {
# code blocks are always excluded by the -lp coding so we can skip them
next if ( $rblock_type_of_seqno->{$seqno} );
my $KK = $K_opening_container->{$seqno};
next unless defined($KK);
# see if a user exclusion rule turns off -lp for this container
if ( $self->is_excluded_lp($KK) ) {
$ris_excluded_lp_container->{$seqno} = 1;
}
}
return;
} ## end sub set_excluded_lp_containers
######################################
# CODE SECTION 6: Process line-by-line
######################################
sub process_all_lines {
#----------------------------------------------------------
# Main loop to format all lines of a file according to type
#----------------------------------------------------------
my $self = shift;
my $rlines = $self->[_rlines_];
my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
my $file_writer_object = $self->[_file_writer_object_];
my $logger_object = $self->[_logger_object_];
my $vertical_aligner_object = $self->[_vertical_aligner_object_];
my $save_logfile = $self->[_save_logfile_];
# Flag to prevent blank lines when POD occurs in a format skipping sect.
my $in_format_skipping_section;
# set locations for blanks around long runs of keywords
my $rwant_blank_line_after = $self->keyword_group_scan();
my $line_type = EMPTY_STRING;
my $i_last_POD_END = -10;
my $i = -1;
foreach my $line_of_tokens ( @{$rlines} ) {
# insert blank lines requested for keyword sequences
if ( defined( $rwant_blank_line_after->{$i} )
&& $rwant_blank_line_after->{$i} == 1 )
{
$self->want_blank_line();
}
$i++;
my $last_line_type = $line_type;
$line_type = $line_of_tokens->{_line_type};
my $input_line = $line_of_tokens->{_line_text};
# _line_type codes are:
# SYSTEM - system-specific code before hash-bang line
# CODE - line of perl code (including comments)
# POD_START - line starting pod, such as '=head'
# POD - pod documentation text
# POD_END - last line of pod section, '=cut'
# HERE - text of here-document
# HERE_END - last line of here-doc (target word)
# FORMAT - format section
# FORMAT_END - last line of format section, '.'
# SKIP - code skipping section
# SKIP_END - last line of code skipping section, '#>>V'
# DATA_START - __DATA__ line
# DATA - unidentified text following __DATA__
# END_START - __END__ line
# END - unidentified text following __END__
# ERROR - we are in big trouble, probably not a perl script
# put a blank line after an =cut which comes before __END__ and __DATA__
# (required by podchecker)
if ( $last_line_type eq 'POD_END' && !$self->[_saw_END_or_DATA_] ) {
$i_last_POD_END = $i;
$file_writer_object->reset_consecutive_blank_lines();
if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
$self->want_blank_line();
}
}
# handle line of code..
if ( $line_type eq 'CODE' ) {
my $CODE_type = $line_of_tokens->{_code_type};
$in_format_skipping_section = $CODE_type eq 'FS';
# Handle blank lines
if ( $CODE_type eq 'BL' ) {
# Keep this blank? Start with the flag -kbl=n, where
# n=0 ignore all old blank lines
# n=1 stable: keep old blanks, but limited by -mbl=n
# n=2 keep all old blank lines, regardless of -mbl=n
# If n=0 we delete all old blank lines and let blank line
# rules generate any needed blank lines.
my $kgb_keep = $rOpts_keep_old_blank_lines;
# Then delete lines requested by the keyword-group logic if
# allowed
if ( $kgb_keep == 1
&& defined( $rwant_blank_line_after->{$i} )
&& $rwant_blank_line_after->{$i} == 2 )
{
$kgb_keep = 0;
}
# But always keep a blank line following an =cut
if ( $i - $i_last_POD_END < 3 && !$kgb_keep ) {
$kgb_keep = 1;
}
if ($kgb_keep) {
$self->flush($CODE_type);
$file_writer_object->write_blank_code_line(
$rOpts_keep_old_blank_lines == 2 );
$self->[_last_line_leading_type_] = 'b';
}
next;
}
else {
# Let logger see all non-blank lines of code. This is a slow
# operation so we avoid it if it is not going to be saved.
if ( $save_logfile && $logger_object ) {
$logger_object->black_box( $line_of_tokens,
$vertical_aligner_object->get_output_line_number );
}
}
# Handle Format Skipping (FS) and Verbatim (VB) Lines
if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
$self->write_unindented_line($input_line);
$file_writer_object->reset_consecutive_blank_lines();
next;
}
# Handle all other lines of code
$self->process_line_of_CODE($line_of_tokens);
}
# handle line of non-code..
else {
# set special flags
my $skip_line = 0;
if ( substr( $line_type, 0, 3 ) eq 'POD' ) {
# Pod docs should have a preceding blank line. But stay
# out of __END__ and __DATA__ sections, because
# the user may be using this section for any purpose whatsoever
if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
if ( $rOpts->{'trim-pod'} ) {
chomp $input_line;
$input_line =~ s/\s+$//;
$input_line .= "\n";
}
if ( !$skip_line
&& !$in_format_skipping_section
&& $line_type eq 'POD_START'
&& !$self->[_saw_END_or_DATA_] )
{
$self->want_blank_line();
}
}
# leave the blank counters in a predictable state
# after __END__ or __DATA__
elsif ( $line_type eq 'END_START' || $line_type eq 'DATA_START' ) {
$file_writer_object->reset_consecutive_blank_lines();
$self->[_saw_END_or_DATA_] = 1;
}
# Patch to avoid losing blank lines after a code-skipping block;
# fixes case c047.
elsif ( $line_type eq 'SKIP_END' ) {
$file_writer_object->reset_consecutive_blank_lines();
}
else {
## some other line type
}
# write unindented non-code line
if ( !$skip_line ) {
$self->write_unindented_line($input_line);
}
}
}
return;
} ## end sub process_all_lines
{ ## closure keyword_group_scan
# this is the return var
my $rhash_of_desires;
# user option variables for -kgb
my (
$rOpts_kgb_after,
$rOpts_kgb_before,
$rOpts_kgb_delete,
$rOpts_kgb_inside,
$rOpts_kgb_size_max,
$rOpts_kgb_size_min,
);
# group variables, initialized by kgb_initialize_group_vars
my ( $ibeg, $iend, $count, $level_beg, $K_closing );
my ( @iblanks, @group, @subgroup );
# line variables, updated by sub keyword_group_scan
my ( $line_type, $CODE_type, $K_first, $K_last );
my $number_of_groups_seen;
#------------------------
# -kgb helper subroutines
#------------------------
sub kgb_initialize_options {
# check and initialize user options for -kgb
# return error flag:
# true for some input error, do not continue
# false if ok
# Local copies of the various control parameters
$rOpts_kgb_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba'
$rOpts_kgb_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb'
$rOpts_kgb_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd'
$rOpts_kgb_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi'
# A range of sizes can be input with decimal notation like 'min.max'
# with any number of dots between the two numbers. Examples:
# string => min max matches
# 1.1 1 1 exactly 1
# 1.3 1 3 1,2, or 3
# 1..3 1 3 1,2, or 3
# 5 5 - 5 or more
# 6. 6 - 6 or more
# .2 - 2 up to 2
# 1.0 1 0 nothing
my $rOpts_kgb_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs'
( $rOpts_kgb_size_min, $rOpts_kgb_size_max ) = split /\.+/,
$rOpts_kgb_size;
if ( $rOpts_kgb_size_min && $rOpts_kgb_size_min !~ /^\d+$/
|| $rOpts_kgb_size_max && $rOpts_kgb_size_max !~ /^\d+$/ )
{
Warn(<<EOM);
Unexpected value for -kgbs: '$rOpts_kgb_size'; expecting 'min' or 'min.max';
ignoring all -kgb flags
EOM
# Turn this option off so that this message does not keep repeating
# during iterations and other files.
$rOpts->{'keyword-group-blanks-size'} = EMPTY_STRING;
return $rhash_of_desires;
}
$rOpts_kgb_size_min = 1 unless ($rOpts_kgb_size_min);
if ( $rOpts_kgb_size_max && $rOpts_kgb_size_max < $rOpts_kgb_size_min )
{
return $rhash_of_desires;
}
# check codes for $rOpts_kgb_before and
# $rOpts_kgb_after:
# 0 = never (delete if exist)
# 1 = stable (keep unchanged)
# 2 = always (insert if missing)
my $ok = $rOpts_kgb_size_min > 0
&& ( $rOpts_kgb_before != 1
|| $rOpts_kgb_after != 1
|| $rOpts_kgb_inside
|| $rOpts_kgb_delete );
return $rhash_of_desires if ( !$ok );
return;
} ## end sub kgb_initialize_options
sub kgb_initialize_group_vars {
# Definitions:
# $ibeg = first line index of this entire group
# $iend = last line index of this entire group
# $count = total number of keywords seen in this entire group
# $level_beg = indentation level of this group
# @group = [ $i, $token, $count ] =list of all keywords & blanks
# @subgroup = $j, index of group where token changes
# @iblanks = line indexes of blank lines in input stream in this group
# where i=starting line index
# token (the keyword)
# count = number of this token in this subgroup
# j = index in group where token changes
$ibeg = -1;
$iend = undef;
$level_beg = -1;
$K_closing = undef;
$count = 0;
@group = ();
@subgroup = ();
@iblanks = ();
return;
} ## end sub kgb_initialize_group_vars
sub kgb_initialize_line_vars {
$CODE_type = EMPTY_STRING;
$K_first = undef;
$K_last = undef;
$line_type = EMPTY_STRING;
return;
} ## end sub kgb_initialize_line_vars
sub kgb_initialize {
# initialize all closure variables for -kgb
# return:
# true to cause immediate exit (something is wrong)
# false to continue ... all is okay
# This is the return variable:
$rhash_of_desires = {};
# initialize and check user options;
my $quit = kgb_initialize_options();
if ($quit) { return $quit }
# initialize variables for the current group and subgroups:
kgb_initialize_group_vars();
# initialize variables for the most recently seen line:
kgb_initialize_line_vars();
$number_of_groups_seen = 0;
# all okay
return;
} ## end sub kgb_initialize
sub kgb_insert_blank_after {
my ($i) = @_;
$rhash_of_desires->{$i} = 1;
my $ip = $i + 1;
if ( defined( $rhash_of_desires->{$ip} )
&& $rhash_of_desires->{$ip} == 2 )
{
$rhash_of_desires->{$ip} = 0;
}
return;
} ## end sub kgb_insert_blank_after
sub kgb_split_into_sub_groups {
# place blanks around long sub-groups of keywords
# ...if requested
return unless ($rOpts_kgb_inside);
# loop over sub-groups, index k
push @subgroup, scalar @group;
my $kbeg = 1;
my $kend = @subgroup - 1;
foreach my $k ( $kbeg .. $kend ) {
# index j runs through all keywords found
my $j_b = $subgroup[ $k - 1 ];
my $j_e = $subgroup[$k] - 1;
# index i is the actual line number of a keyword
my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] };
my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] };
my $num = $count_e - $count_b + 1;
# This subgroup runs from line $ib to line $ie-1, but may contain
# blank lines
if ( $num >= $rOpts_kgb_size_min ) {
# if there are blank lines, we require that at least $num lines
# be non-blank up to the boundary with the next subgroup.
my $nog_b = my $nog_e = 1;
if ( @iblanks && !$rOpts_kgb_delete ) {
my $j_bb = $j_b + $num - 1;
my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] };
$nog_b = $count_bb - $count_b + 1 == $num;
my $j_ee = $j_e - ( $num - 1 );
my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] };
$nog_e = $count_e - $count_ee + 1 == $num;
}
if ( $nog_b && $k > $kbeg ) {
kgb_insert_blank_after( $i_b - 1 );
}
if ( $nog_e && $k < $kend ) {
my ( $i_ep, $tok_ep, $count_ep ) =
@{ $group[ $j_e + 1 ] };
kgb_insert_blank_after( $i_ep - 1 );
}
}
}
return;
} ## end sub kgb_split_into_sub_groups
sub kgb_delete_if_blank {
my ( $self, $i ) = @_;
# delete line $i if it is blank
my $rlines = $self->[_rlines_];
return if ( $i < 0 || $i >= @{$rlines} );
return if ( $rlines->[$i]->{_line_type} ne 'CODE' );
my $code_type = $rlines->[$i]->{_code_type};
if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
return;
} ## end sub kgb_delete_if_blank
sub kgb_delete_inner_blank_lines {
# always remove unwanted trailing blank lines from our list
return unless (@iblanks);
while ( my $ibl = pop(@iblanks) ) {
if ( $ibl < $iend ) { push @iblanks, $ibl; last }
$iend = $ibl;
}
# now mark mark interior blank lines for deletion if requested
return unless ($rOpts_kgb_delete);
while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
return;
} ## end sub kgb_delete_inner_blank_lines
sub kgb_end_group {
# end a group of keywords
my ( $self, $bad_ending ) = @_;
if ( defined($ibeg) && $ibeg >= 0 ) {
# then handle sufficiently large groups
if ( $count >= $rOpts_kgb_size_min ) {
$number_of_groups_seen++;
# do any blank deletions regardless of the count
kgb_delete_inner_blank_lines();
my $rlines = $self->[_rlines_];
if ( $ibeg > 0 ) {
my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
# patch for hash bang line which is not currently marked as
# a comment; mark it as a comment
if ( $ibeg == 1 && !$code_type ) {
my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
$code_type = 'BC'
if ( $line_text && $line_text =~ /^#/ );
}
# Do not insert a blank after a comment
# (this could be subject to a flag in the future)
if ( $code_type !~ /(?:BC|SBC|SBCX)/ ) {
if ( $rOpts_kgb_before == INSERT ) {
kgb_insert_blank_after( $ibeg - 1 );
}
elsif ( $rOpts_kgb_before == DELETE ) {
$self->kgb_delete_if_blank( $ibeg - 1 );
}
else {
## == STABLE
}
}
}
# We will only put blanks before code lines. We could loosen
# this rule a little, but we have to be very careful because
# for example we certainly don't want to drop a blank line
# after a line like this:
# my $var = <<EOM;
if ( $line_type eq 'CODE' && defined($K_first) ) {
# - Do not put a blank before a line of different level
# - Do not put a blank line if we ended the search badly
# - Do not put a blank at the end of the file
# - Do not put a blank line before a hanging side comment
my $rLL = $self->[_rLL_];
my $level = $rLL->[$K_first]->[_LEVEL_];
my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
if ( $level == $level_beg
&& $ci_level == 0
&& !$bad_ending
&& $iend < @{$rlines}
&& $CODE_type ne 'HSC' )
{
if ( $rOpts_kgb_after == INSERT ) {
kgb_insert_blank_after($iend);
}
elsif ( $rOpts_kgb_after == DELETE ) {
$self->kgb_delete_if_blank( $iend + 1 );
}
else {
## == STABLE
}
}
}
}
kgb_split_into_sub_groups();
}
# reset for another group
kgb_initialize_group_vars();
return;
} ## end sub kgb_end_group
sub kgb_find_container_end {
# If the keyword line is continued onto subsequent lines, find the
# closing token '$K_closing' so that we can easily skip past the
# contents of the container.
# We only set this value if we find a simple list, meaning
# -contents only one level deep
# -not welded
my ($self) = @_;
# First check: skip if next line is not one deeper
my $Knext_nonblank = $self->K_next_nonblank($K_last);
return if ( !defined($Knext_nonblank) );
my $rLL = $self->[_rLL_];
my $level_next = $rLL->[$Knext_nonblank]->[_LEVEL_];
return if ( $level_next != $level_beg + 1 );
# Find the parent container of the first token on the next line
my $parent_seqno = $self->parent_seqno_by_K($Knext_nonblank);
return unless ( defined($parent_seqno) );
# Must not be a weld (can be unstable)
return
if ( $total_weld_count
&& $self->is_welded_at_seqno($parent_seqno) );
# Opening container must exist and be on this line
my $Ko = $self->[_K_opening_container_]->{$parent_seqno};
return if ( !defined($Ko) || $Ko <= $K_first || $Ko > $K_last );
# Verify that the closing container exists and is on a later line
my $Kc = $self->[_K_closing_container_]->{$parent_seqno};
return if ( !defined($Kc) || $Kc <= $K_last );
# That's it
$K_closing = $Kc;
return;
} ## end sub kgb_find_container_end
sub kgb_add_to_group {
my ( $self, $i, $token, $level ) = @_;
# End the previous group if we have reached the maximum
# group size
if ( $rOpts_kgb_size_max && @group >= $rOpts_kgb_size_max ) {
$self->kgb_end_group();
}
if ( @group == 0 ) {
$ibeg = $i;
$level_beg = $level;
$count = 0;
}
$count++;
$iend = $i;
# New sub-group?
if ( !@group || $token ne $group[-1]->[1] ) {
push @subgroup, scalar(@group);
}
push @group, [ $i, $token, $count ];
# remember if this line ends in an open container
$self->kgb_find_container_end();
return;
} ## end sub kgb_add_to_group
#---------------------
# -kgb main subroutine
#---------------------
sub keyword_group_scan {
my $self = shift;
# Called once per file to process --keyword-group-blanks-* parameters.
# Task:
# Manipulate blank lines around keyword groups (kgb* flags)
# Scan all lines looking for runs of consecutive lines beginning with
# selected keywords. Example keywords are 'my', 'our', 'local', ... but
# they may be anything. We will set flags requesting that blanks be
# inserted around and within them according to input parameters. Note
# that we are scanning the lines as they came in in the input stream, so
# they are not necessarily well formatted.
# Returns:
# The output of this sub is a return hash ref whose keys are the indexes
# of lines after which we desire a blank line. For line index $i:
# $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
# $rhash_of_desires->{$i} = 2 means we want blank line $i removed
# Nothing to do if no blanks can be output. This test added to fix
# case b760.
if ( !$rOpts_maximum_consecutive_blank_lines ) {
return $rhash_of_desires;
}
#---------------
# initialization
#---------------
my $quit = kgb_initialize();
if ($quit) { return $rhash_of_desires }
my $rLL = $self->[_rLL_];
my $rlines = $self->[_rlines_];
$self->kgb_end_group();
my $i = -1;
my $Opt_repeat_count =
$rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr'
#----------------------------------
# loop over all lines of the source
#----------------------------------
foreach my $line_of_tokens ( @{$rlines} ) {
$i++;
last
if ( $Opt_repeat_count > 0
&& $number_of_groups_seen >= $Opt_repeat_count );
kgb_initialize_line_vars();
$line_type = $line_of_tokens->{_line_type};
# always end a group at non-CODE
if ( $line_type ne 'CODE' ) { $self->kgb_end_group(); next }
$CODE_type = $line_of_tokens->{_code_type};
# end any group at a format skipping line
if ( $CODE_type && $CODE_type eq 'FS' ) {
$self->kgb_end_group();
next;
}
# continue in a verbatim (VB) type; it may be quoted text
if ( $CODE_type eq 'VB' ) {
if ( $ibeg >= 0 ) { $iend = $i; }
next;
}
# and continue in blank (BL) types
if ( $CODE_type eq 'BL' ) {
if ( $ibeg >= 0 ) {
$iend = $i;
push @{iblanks}, $i;
# propagate current subgroup token
my $tok = $group[-1]->[1];
push @group, [ $i, $tok, $count ];
}
next;
}
# examine the first token of this line
my $rK_range = $line_of_tokens->{_rK_range};
( $K_first, $K_last ) = @{$rK_range};
if ( !defined($K_first) ) {
# Somewhat unexpected blank line..
# $rK_range is normally defined for line type CODE, but this can
# happen for example if the input line was a single semicolon
# which is being deleted. In that case there was code in the
# input file but it is not being retained. So we can silently
# return.
return $rhash_of_desires;
}
my $level = $rLL->[$K_first]->[_LEVEL_];
my $type = $rLL->[$K_first]->[_TYPE_];
my $token = $rLL->[$K_first]->[_TOKEN_];
my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
# End a group 'badly' at an unexpected level. This will prevent
# blank lines being incorrectly placed after the end of the group.
# We are looking for any deviation from two acceptable patterns:
# PATTERN 1: a simple list; secondary lines are at level+1
# PATTERN 2: a long statement; all secondary lines same level
# This was added as a fix for case b1177, in which a complex
# structure got incorrectly inserted blank lines.
if ( $ibeg >= 0 ) {
# Check for deviation from PATTERN 1, simple list:
if ( defined($K_closing) && $K_first < $K_closing ) {
$self->kgb_end_group(1) if ( $level != $level_beg + 1 );
}
# Check for deviation from PATTERN 2, single statement:
elsif ( $level != $level_beg ) { $self->kgb_end_group(1) }
else {
## no deviation
}
}
# Do not look for keywords in lists ( keyword 'my' can occur in
# lists, see case b760); fixed for c048.
if ( $self->is_list_by_K($K_first) ) {
if ( $ibeg >= 0 ) { $iend = $i }
next;
}
# see if this is a code type we seek (i.e. comment)
if ( $CODE_type
&& $keyword_group_list_comment_pattern
&& $CODE_type =~ /$keyword_group_list_comment_pattern/ )
{
my $tok = $CODE_type;
# Continuing a group
if ( $ibeg >= 0 && $level == $level_beg ) {
$self->kgb_add_to_group( $i, $tok, $level );
}
# Start new group
else {
# first end old group if any; we might be starting new
# keywords at different level
if ( $ibeg >= 0 ) { $self->kgb_end_group(); }
$self->kgb_add_to_group( $i, $tok, $level );
}
next;
}
# See if it is a keyword we seek, but never start a group in a
# continuation line; the code may be badly formatted.
if ( $ci_level == 0
&& $type eq 'k'
&& $token =~ /$keyword_group_list_pattern/ )
{
# Continuing a keyword group
if ( $ibeg >= 0 && $level == $level_beg ) {
$self->kgb_add_to_group( $i, $token, $level );
}
# Start new keyword group
else {
# first end old group if any; we might be starting new
# keywords at different level
if ( $ibeg >= 0 ) { $self->kgb_end_group(); }
$self->kgb_add_to_group( $i, $token, $level );
}
next;
}
# This is not one of our keywords, but we are in a keyword group
# so see if we should continue or quit
elsif ( $ibeg >= 0 ) {
# - bail out on a large level change; we may have walked into a
# data structure or anonymous sub code.
if ( $level > $level_beg + 1 || $level < $level_beg ) {
$self->kgb_end_group(1);
next;
}
# - keep going on a continuation line of the same level, since
# it is probably a continuation of our previous keyword,
# - and keep going past hanging side comments because we never
# want to interrupt them.
if ( ( ( $level == $level_beg ) && $ci_level > 0 )
|| $CODE_type eq 'HSC' )
{
$iend = $i;
next;
}
# - continue if if we are within in a container which started
# with the line of the previous keyword.
if ( defined($K_closing) && $K_first <= $K_closing ) {
# continue if entire line is within container
if ( $K_last <= $K_closing ) { $iend = $i; next }
# continue at ); or }; or ];
my $KK = $K_closing + 1;
if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
if ( $KK < $K_last ) {
if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' )
{
$self->kgb_end_group(1);
next;
}
}
$iend = $i;
next;
}
$self->kgb_end_group(1);
next;
}
# - end the group if none of the above
$self->kgb_end_group();
next;
}
# not in a keyword group; continue
else { next }
} ## end of loop over all lines
$self->kgb_end_group();
return $rhash_of_desires;
} ## end sub keyword_group_scan
} ## end closure keyword_group_scan
#######################################
# CODE SECTION 7: Process lines of code
#######################################
{ ## begin closure process_line_of_CODE
# The routines in this closure receive lines of code and combine them into
# 'batches' and send them along. A 'batch' is the unit of code which can be
# processed further as a unit. It has the property that it is the largest
# amount of code into which which perltidy is free to place one or more
# line breaks within it without violating any constraints.
# When a new batch is formed it is sent to sub 'grind_batch_of_code'.
# flags needed by the store routine
my $line_of_tokens;
my $no_internal_newlines;
my $CODE_type;
my $current_line_starts_in_quote;
# range of K of tokens for the current line
my ( $K_first, $K_last );
my ( $rLL, $radjusted_levels, $rparent_of_seqno, $rdepth_of_opening_seqno,
$rblock_type_of_seqno, $ri_starting_one_line_block );
# past stored nonblank tokens and flags
my (
$K_last_nonblank_code, $K_dangling_elsif,
$is_static_block_comment, $last_CODE_type,
$last_line_had_side_comment, $next_parent_seqno,
$next_slevel,
);
# Called once at the start of a new file
sub initialize_process_line_of_CODE {
$K_last_nonblank_code = undef;
$K_dangling_elsif = 0;
$is_static_block_comment = 0;
$last_line_had_side_comment = 0;
$next_parent_seqno = SEQ_ROOT;
$next_slevel = undef;
return;
} ## end sub initialize_process_line_of_CODE
# Batch variables: these describe the current batch of code being formed
# and sent down the pipeline. They are initialized in the next
# sub.
my (
$rbrace_follower, $index_start_one_line_block,
$starting_in_quote, $ending_in_quote,
);
# Called before the start of each new batch
sub initialize_batch_variables {
# Initialize array values for a new batch. Any changes here must be
# carefully coordinated with sub store_token_to_go.
$max_index_to_go = UNDEFINED_INDEX;
$summed_lengths_to_go[0] = 0;
$nesting_depth_to_go[0] = 0;
$ri_starting_one_line_block = [];
# Redefine some sparse arrays.
# It is more efficient to redefine these sparse arrays and rely on
# undef's instead of initializing to 0's. Testing showed that using
# @array=() is more efficient than $#array=-1
@old_breakpoint_to_go = ();
@forced_breakpoint_to_go = ();
@block_type_to_go = ();
@mate_index_to_go = ();
@type_sequence_to_go = ();
# NOTE: @nobreak_to_go is sparse and could be treated this way, but
# testing showed that there would be very little efficiency gain
# because an 'if' test must be added in store_token_to_go.
# The initialization code for the remaining batch arrays is as follows
# and can be activated for testing. But profiling shows that it is
# time-consuming to re-initialize the batch arrays and is not necessary
# because the maximum valid token, $max_index_to_go, is carefully
# controlled. This means however that it is not possible to do any
# type of filter or map operation directly on these arrays. And it is
# not possible to use negative indexes. As a precaution against program
# changes which might do this, sub pad_array_to_go adds some undefs at
# the end of the current batch of data.
## 0 && do { #<<<
## @nobreak_to_go = ();
## @token_lengths_to_go = ();
## @levels_to_go = ();
## @ci_levels_to_go = ();
## @tokens_to_go = ();
## @K_to_go = ();
## @types_to_go = ();
## @leading_spaces_to_go = ();
## @reduced_spaces_to_go = ();
## @inext_to_go = ();
## @parent_seqno_to_go = ();
## };
$rbrace_follower = undef;
$ending_in_quote = 0;
$index_start_one_line_block = undef;
# initialize forced breakpoint vars associated with each output batch
$forced_breakpoint_count = 0;
$index_max_forced_break = UNDEFINED_INDEX;
$forced_breakpoint_undo_count = 0;
return;
} ## end sub initialize_batch_variables
sub leading_spaces_to_go {
# return the number of indentation spaces for a token in the output
# stream
my ($ii) = @_;
return 0 if ( $ii < 0 );
my $indentation = $leading_spaces_to_go[$ii];
return ref($indentation) ? $indentation->get_spaces() : $indentation;
} ## end sub leading_spaces_to_go
sub create_one_line_block {
# set index starting next one-line block
# call with no args to delete the current one-line block
($index_start_one_line_block) = @_;
return;
} ## end sub create_one_line_block
# Routine to place the current token into the output stream.
# Called once per output token.
use constant DEBUG_STORE => 0;
sub store_token_to_go {
my ( $self, $Ktoken_vars, $rtoken_vars ) = @_;
#-------------------------------------------------------
# Token storage utility for sub process_line_of_CODE.
# Add one token to the next batch of '_to_go' variables.
#-------------------------------------------------------
# Input parameters:
# $Ktoken_vars = the index K in the global token array
# $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values
# unless they are temporarily being overridden
#------------------------------------------------------------------
# NOTE: called once per token so coding efficiency is critical here.
# All changes need to be benchmarked with Devel::NYTProf.
#------------------------------------------------------------------
my (
$type,
$token,
$ci_level,
$level,
$seqno,
$length,
) = @{$rtoken_vars}[
_TYPE_,
_TOKEN_,
_CI_LEVEL_,
_LEVEL_,
_TYPE_SEQUENCE_,
_TOKEN_LENGTH_,
];
# Check for emergency flush...
# The K indexes in the batch must always be a continuous sequence of
# the global token array. The batch process programming assumes this.
# If storing this token would cause this relation to fail we must dump
# the current batch before storing the new token. It is extremely rare
# for this to happen. One known example is the following two-line
# snippet when run with parameters
# --noadd-newlines --space-terminal-semicolon:
# if ( $_ =~ /PENCIL/ ) { $pencil_flag= 1 } ; ;
# $yy=1;
if ( $max_index_to_go >= 0 ) {
if ( $Ktoken_vars != $K_to_go[$max_index_to_go] + 1 ) {
$self->flush_batch_of_CODE();
}
# Do not output consecutive blank tokens ... this should not
# happen, but it is worth checking. Later code can then make the
# simplifying assumption that blank tokens are not consecutive.
elsif ( $type eq 'b' && $types_to_go[$max_index_to_go] eq 'b' ) {
if (DEVEL_MODE) {
# if this happens, it is may be that consecutive blanks
# were inserted into the token stream in 'respace_tokens'
my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
Fault("consecutive blanks near line $lno; please fix");
}
return;
}
else {
## all ok
}
}
# Do not start a batch with a blank token.
# Fixes cases b149 b888 b984 b985 b986 b987
else {
if ( $type eq 'b' ) { return }
}
# Update counter and do initializations if first token of new batch
if ( !++$max_index_to_go ) {
# Reset flag '$starting_in_quote' for a new batch. It must be set
# to the value of '$in_continued_quote', but here for efficiency we
# set it to zero, which is its normal value. Then in coding below
# we will change it if we find we are actually in a continued quote.
$starting_in_quote = 0;
# Update the next parent sequence number for each new batch.
#----------------------------------------
# Begin coding from sub parent_seqno_by_K
#----------------------------------------
# The following is equivalent to this call but much faster:
# $next_parent_seqno = $self->parent_seqno_by_K($Ktoken_vars);
$next_parent_seqno = SEQ_ROOT;
if ($seqno) {
$next_parent_seqno = $rparent_of_seqno->{$seqno};
}
else {
my $Kt = $rLL->[$Ktoken_vars]->[_KNEXT_SEQ_ITEM_];
if ( defined($Kt) ) {
my $type_sequence_t = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
my $type_t = $rLL->[$Kt]->[_TYPE_];
# if next container token is closing, it is the parent seqno
if ( $is_closing_type{$type_t} ) {
$next_parent_seqno = $type_sequence_t;
}
# otherwise we want its parent container
else {
$next_parent_seqno =
$rparent_of_seqno->{$type_sequence_t};
}
}
}
$next_parent_seqno = SEQ_ROOT
if ( !defined($next_parent_seqno) );
#--------------------------------------
# End coding from sub parent_seqno_by_K
#--------------------------------------
$next_slevel = $rdepth_of_opening_seqno->[$next_parent_seqno] + 1;
}
# Clip levels to zero if there are level errors in the file.
# We had to wait until now for reasons explained in sub 'write_line'.
if ( $level < 0 ) { $level = 0 }
# Safety check that length is defined. This is slow and should not be
# needed now, so just do it in DEVEL_MODE to check programming changes.
# Formerly needed for --indent-only, in which the entire set of tokens
# is normally turned into type 'q'. Lengths are now defined in sub
# 'respace_tokens' so this check is no longer needed.
if ( DEVEL_MODE && !defined($length) ) {
my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
$length = length($token);
Fault(<<EOM);
undefined length near line $lno; num chars=$length, token='$token'
EOM
}
#----------------------------
# add this token to the batch
#----------------------------
$K_to_go[$max_index_to_go] = $Ktoken_vars;
$types_to_go[$max_index_to_go] = $type;
$tokens_to_go[$max_index_to_go] = $token;
$ci_levels_to_go[$max_index_to_go] = $ci_level;
$levels_to_go[$max_index_to_go] = $level;
$nobreak_to_go[$max_index_to_go] = $no_internal_newlines;
$token_lengths_to_go[$max_index_to_go] = $length;
# Skip point initialization for these sparse arrays - undef's okay;
# See also related code in sub initialize_batch_variables.
## $old_breakpoint_to_go[$max_index_to_go] = 0;
## $forced_breakpoint_to_go[$max_index_to_go] = 0;
## $block_type_to_go[$max_index_to_go] = EMPTY_STRING;
## $type_sequence_to_go[$max_index_to_go] = $seqno;
# NOTE: nobreak_to_go can be treated as a sparse array, but testing
# showed that there is almost no efficiency gain because an if test
# would need to be added.
# We keep a running sum of token lengths from the start of this batch:
# summed_lengths_to_go[$i] = total length to just before token $i
# summed_lengths_to_go[$i+1] = total length to just after token $i
$summed_lengths_to_go[ $max_index_to_go + 1 ] =
$summed_lengths_to_go[$max_index_to_go] + $length;
# Initialize some sequence-dependent variables to their normal values
$parent_seqno_to_go[$max_index_to_go] = $next_parent_seqno;
$nesting_depth_to_go[$max_index_to_go] = $next_slevel;
# Then fix them at container tokens:
if ($seqno) {
$type_sequence_to_go[$max_index_to_go] = $seqno;
$block_type_to_go[$max_index_to_go] =
$rblock_type_of_seqno->{$seqno};
if ( $is_opening_token{$token} ) {
my $slevel = $rdepth_of_opening_seqno->[$seqno];
$nesting_depth_to_go[$max_index_to_go] = $slevel;
$next_slevel = $slevel + 1;
$next_parent_seqno = $seqno;
}
elsif ( $is_closing_token{$token} ) {
$next_slevel = $rdepth_of_opening_seqno->[$seqno];
my $slevel = $next_slevel + 1;
$nesting_depth_to_go[$max_index_to_go] = $slevel;
my $parent_seqno = $rparent_of_seqno->{$seqno};
$parent_seqno = SEQ_ROOT unless defined($parent_seqno);
$parent_seqno_to_go[$max_index_to_go] = $parent_seqno;
$next_parent_seqno = $parent_seqno;
}
else {
# ternary token: nothing to do
}
}
# Define the indentation that this token will have in two cases:
# Without CI = reduced_spaces_to_go
# With CI = leading_spaces_to_go
$leading_spaces_to_go[$max_index_to_go] =
$reduced_spaces_to_go[$max_index_to_go] =
$rOpts_indent_columns * $radjusted_levels->[$Ktoken_vars];
if ($ci_level) {
$leading_spaces_to_go[$max_index_to_go] +=
$rOpts_continuation_indentation;
}
# Correct these values if we are starting in a continued quote
if ( $current_line_starts_in_quote
&& $Ktoken_vars == $K_first )
{
# in a continued quote - correct value set above if first token
if ( $max_index_to_go == 0 ) { $starting_in_quote = 1 }
$leading_spaces_to_go[$max_index_to_go] = 0;
$reduced_spaces_to_go[$max_index_to_go] = 0;
}
DEBUG_STORE && do {
my ( $a, $b, $c ) = caller();
print {*STDOUT}
"STORE: from $a $c: storing token $token type $type lev=$level at $max_index_to_go\n";
};
return;
} ## end sub store_token_to_go
sub flush_batch_of_CODE {
# Finish and process the current batch.
# This must be the only call to grind_batch_of_CODE()
my ($self) = @_;
# If a batch has been started ...
if ( $max_index_to_go >= 0 ) {
# Create an array to hold variables for this batch
my $this_batch = [];
$this_batch->[_starting_in_quote_] = 1 if ($starting_in_quote);
$this_batch->[_ending_in_quote_] = 1 if ($ending_in_quote);
if ( $CODE_type || $last_CODE_type ) {
$this_batch->[_batch_CODE_type_] =
$K_to_go[$max_index_to_go] >= $K_first
? $CODE_type
: $last_CODE_type;
}
$last_line_had_side_comment =
( $max_index_to_go > 0 && $types_to_go[$max_index_to_go] eq '#' );
# The flag $is_static_block_comment applies to the line which just
# arrived. So it only applies if we are outputting that line.
if ( $is_static_block_comment && !$last_line_had_side_comment ) {
$this_batch->[_is_static_block_comment_] =
$K_to_go[0] == $K_first;
}
$this_batch->[_ri_starting_one_line_block_] =
$ri_starting_one_line_block;
$self->[_this_batch_] = $this_batch;
#-------------------
# process this batch
#-------------------
$self->grind_batch_of_CODE();
# Done .. this batch is history
$self->[_this_batch_] = undef;
initialize_batch_variables();
}
return;
} ## end sub flush_batch_of_CODE
sub end_batch {
# End the current batch, EXCEPT for a few special cases
my ($self) = @_;
if ( $max_index_to_go < 0 ) {
# nothing to do .. this is harmless but wastes time.
if (DEVEL_MODE) {
Fault("sub end_batch called with nothing to do; please fix\n");
}
return;
}
# Exceptions when a line does not end with a comment... (fixes c058)
if ( $types_to_go[$max_index_to_go] ne '#' ) {
# Exception 1: Do not end line in a weld
return
if ( $total_weld_count
&& $self->[_rK_weld_right_]->{ $K_to_go[$max_index_to_go] } );
# Exception 2: just set a tentative breakpoint if we might be in a
# one-line block
if ( defined($index_start_one_line_block) ) {
$self->set_forced_breakpoint($max_index_to_go);
return;
}
}
$self->flush_batch_of_CODE();
return;
} ## end sub end_batch
sub flush_vertical_aligner {
my ($self) = @_;
my $vao = $self->[_vertical_aligner_object_];
$vao->flush();
return;
} ## end sub flush_vertical_aligner
# flush is called to output any tokens in the pipeline, so that
# an alternate source of lines can be written in the correct order
sub flush {
my ( $self, $CODE_type_flush ) = @_;
# end the current batch with 1 exception
$index_start_one_line_block = undef;
# Exception: if we are flushing within the code stream only to insert
# blank line(s), then we can keep the batch intact at a weld. This
# improves formatting of -ce. See test 'ce1.ce'
if ( $CODE_type_flush && $CODE_type_flush eq 'BL' ) {
$self->end_batch() if ( $max_index_to_go >= 0 );
}
# otherwise, we have to shut things down completely.
else { $self->flush_batch_of_CODE() }
$self->flush_vertical_aligner();
return;
} ## end sub flush
my %is_assignment_or_fat_comma;
BEGIN {
%is_assignment_or_fat_comma = %is_assignment;
$is_assignment_or_fat_comma{'=>'} = 1;
}
sub add_missing_else {
# Add a missing 'else' block.
# $K_dangling_elsif = index of closing elsif brace not followed by else
my ($self) = @_;
# Make sure everything looks okay
if ( !$K_dangling_elsif
|| $K_dangling_elsif < $K_first
|| $rLL->[$K_dangling_elsif]->[_TYPE_] ne '}' )
{
DEVEL_MODE && Fault("could not find closing elsif brace\n");
}
my $comment = $rOpts->{'add-missing-else-comment'};
# Safety check
if ( substr( $comment, 0, 1 ) ne '#' ) { $comment = '#' . $comment }
# Calculate indentation
my $level = $radjusted_levels->[$K_dangling_elsif];
my $spaces = SPACE x ( $level * $rOpts_indent_columns );
my $line1 = $spaces . "else {\n";
my $line3 = $spaces . "}\n";
$spaces .= SPACE x $rOpts_indent_columns;
my $line2 = $spaces . $comment . "\n";
# clear the output pipeline
$self->flush();
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->write_code_line($line1);
$file_writer_object->write_code_line($line2);
$file_writer_object->write_code_line($line3);
return;
}
sub process_line_of_CODE {
my ( $self, $my_line_of_tokens ) = @_;
#----------------------------------------------------------------
# This routine is called once per INPUT line to format all of the
# tokens on that line.
#----------------------------------------------------------------
# It outputs full-line comments and blank lines immediately.
# For lines of code:
# - Tokens are copied one-by-one from the global token
# array $rLL to a set of '_to_go' arrays which collect batches of
# tokens. This is done with calls to 'store_token_to_go'.
# - A batch is closed and processed upon reaching a well defined
# structural break point (i.e. code block boundary) or forced
# breakpoint (i.e. side comment or special user controls).
# - Subsequent stages of formatting make additional line breaks
# appropriate for lists and logical structures, and as necessary to
# keep line lengths below the requested maximum line length.
#-----------------------------------
# begin initialize closure variables
#-----------------------------------
$line_of_tokens = $my_line_of_tokens;
my $rK_range = $line_of_tokens->{_rK_range};
if ( !defined( $rK_range->[0] ) ) {
# Empty line: This can happen if tokens are deleted, for example
# with the -mangle parameter
return;
}
( $K_first, $K_last ) = @{$rK_range};
$last_CODE_type = $CODE_type;
$CODE_type = $line_of_tokens->{_code_type};
$current_line_starts_in_quote = $line_of_tokens->{_starting_in_quote};
$rLL = $self->[_rLL_];
$radjusted_levels = $self->[_radjusted_levels_];
$rparent_of_seqno = $self->[_rparent_of_seqno_];
$rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
$rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
#---------------------------------
# end initialize closure variables
#---------------------------------
# This flag will become nobreak_to_go and should be set to 2 to prevent
# a line break AFTER the current token.
$no_internal_newlines = 0;
if ( !$rOpts_add_newlines || $CODE_type eq 'NIN' ) {
$no_internal_newlines = 2;
}
my $input_line = $line_of_tokens->{_line_text};
my ( $is_block_comment, $has_side_comment );
if ( $rLL->[$K_last]->[_TYPE_] eq '#' ) {
if ( $K_last == $K_first ) { $is_block_comment = 1 }
else { $has_side_comment = 1 }
}
my $is_static_block_comment_without_leading_space =
$CODE_type eq 'SBCX';
$is_static_block_comment =
$CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
# check for a $VERSION statement
if ( $CODE_type eq 'VER' ) {
$self->[_saw_VERSION_in_this_file_] = 1;
$no_internal_newlines = 2;
}
# Add interline blank if any
my $last_old_nonblank_type = "b";
my $first_new_nonblank_token = EMPTY_STRING;
my $K_first_true = $K_first;
if ( $max_index_to_go >= 0 ) {
$last_old_nonblank_type = $types_to_go[$max_index_to_go];
$first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
if ( !$is_block_comment
&& $types_to_go[$max_index_to_go] ne 'b'
&& $K_first > 0
&& $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
{
$K_first -= 1;
}
}
my $rtok_first = $rLL->[$K_first];
my $in_quote = $line_of_tokens->{_ending_in_quote};
$ending_in_quote = $in_quote;
#------------------------------------
# Handle a block (full-line) comment.
#------------------------------------
if ($is_block_comment) {
if ( $rOpts->{'delete-block-comments'} ) {
$self->flush();
return;
}
$index_start_one_line_block = undef;
$self->end_batch() if ( $max_index_to_go >= 0 );
# output a blank line before block comments
if (
# unless we follow a blank or comment line
$self->[_last_line_leading_type_] ne '#'
&& $self->[_last_line_leading_type_] ne 'b'
# only if allowed
&& $rOpts->{'blanks-before-comments'}
# if this is NOT an empty comment, unless it follows a side
# comment and could become a hanging side comment.
&& (
$rtok_first->[_TOKEN_] ne '#'
|| ( $last_line_had_side_comment
&& $rLL->[$K_first]->[_LEVEL_] > 0 )
)
# not after a short line ending in an opening token
# because we already have space above this comment.
# Note that the first comment in this if block, after
# the 'if (', does not get a blank line because of this.
&& !$self->[_last_output_short_opening_token_]
# never before static block comments
&& !$is_static_block_comment
)
{
$self->flush(); # switching to new output stream
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->write_blank_code_line();
$self->[_last_line_leading_type_] = 'b';
}
if (
$rOpts->{'indent-block-comments'}
&& ( !$rOpts->{'indent-spaced-block-comments'}
|| $input_line =~ /^\s+/ )
&& !$is_static_block_comment_without_leading_space
)
{
my $Ktoken_vars = $K_first;
my $rtoken_vars = $rLL->[$Ktoken_vars];
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
$self->end_batch();
}
else {
# switching to new output stream
$self->flush();
# Note that last arg in call here is 'undef' for comments
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->write_code_line(
$rtok_first->[_TOKEN_] . "\n", undef );
$self->[_last_line_leading_type_] = '#';
}
return;
}
#--------------------------------------------
# Compare input/output indentation in logfile
#--------------------------------------------
if ( $self->[_save_logfile_] ) {
my $guessed_indentation_level =
$line_of_tokens->{_guessed_indentation_level};
# Compare input/output indentation except for:
# - hanging side comments
# - continuation lines (have unknown leading blank space)
# - and lines which are quotes (they may have been outdented)
my $exception =
$CODE_type eq 'HSC'
|| $rtok_first->[_CI_LEVEL_] > 0
|| $guessed_indentation_level == 0
&& $rtok_first->[_TYPE_] eq 'Q';
if ( !$exception ) {
my $input_line_number = $line_of_tokens->{_line_number};
$self->compare_indentation_levels( $K_first,
$guessed_indentation_level, $input_line_number );
}
}
#-----------------------------------------
# Handle a line marked as indentation-only
#-----------------------------------------
if ( $CODE_type eq 'IO' ) {
$self->flush();
my $line = $input_line;
# Fix for rt #125506 Unexpected string formatting
# in which leading space of a terminal quote was removed
$line =~ s/\s+$//;
$line =~ s/^\s+// unless ( $line_of_tokens->{_starting_in_quote} );
my $Ktoken_vars = $K_first;
# We work with a copy of the token variables and change the
# first token to be the entire line as a quote variable
my $rtoken_vars = $rLL->[$Ktoken_vars];
$rtoken_vars = copy_token_as_type( $rtoken_vars, 'q', $line );
# Patch: length is not really important here but must be defined
$rtoken_vars->[_TOKEN_LENGTH_] = length($line);
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
$self->end_batch();
return;
}
#---------------------------
# Handle all other lines ...
#---------------------------
$K_dangling_elsif = 0;
# This is a good place to kill incomplete one-line blocks
if ( $max_index_to_go >= 0 ) {
# For -iob and -lp, mark essential old breakpoints.
# Fixes b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058
# See related code below.
if ( $rOpts_ignore_old_breakpoints && $rOpts_line_up_parentheses ) {
my $type_first = $rLL->[$K_first_true]->[_TYPE_];
if ( $is_assignment_or_fat_comma{$type_first} ) {
$old_breakpoint_to_go[$max_index_to_go] = 1;
}
}
if (
# this check needed -mangle (for example rt125012)
(
( !$index_start_one_line_block )
&& ( $last_old_nonblank_type eq ';' )
&& ( $first_new_nonblank_token ne '}' )
)
# Patch for RT #98902. Honor request to break at old commas.
|| ( $rOpts_break_at_old_comma_breakpoints
&& $last_old_nonblank_type eq ',' )
)
{
$forced_breakpoint_to_go[$max_index_to_go] = 1
if ($rOpts_break_at_old_comma_breakpoints);
$index_start_one_line_block = undef;
$self->end_batch();
}
# Keep any requested breaks before this line. Note that we have to
# use the original K_first because it may have been reduced above
# to add a blank. The value of the flag is as follows:
# 1 => hard break, flush the batch
# 2 => soft break, set breakpoint and continue building the batch
# added check on max_index_to_go for c177
if ( $max_index_to_go >= 0
&& $self->[_rbreak_before_Kfirst_]->{$K_first_true} )
{
$index_start_one_line_block = undef;
if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) {
$self->set_forced_breakpoint($max_index_to_go);
}
else {
$self->end_batch();
}
}
}
#--------------------------------------
# loop to process the tokens one-by-one
#--------------------------------------
$self->process_line_inner_loop($has_side_comment);
# if there is anything left in the output buffer ...
if ( $max_index_to_go >= 0 ) {
my $type = $rLL->[$K_last]->[_TYPE_];
my $break_flag = $self->[_rbreak_after_Klast_]->{$K_last};
# we have to flush ..
if (
# if there is a side comment...
$type eq '#'
# if this line ends in a quote
# NOTE: This is critically important for insuring that quoted
# lines do not get processed by things like -sot and -sct
|| $in_quote
# if this is a VERSION statement
|| $CODE_type eq 'VER'
# to keep a label at the end of a line
|| ( $type eq 'J' && $rOpts_break_after_labels != 2 )
# if we have a hard break request
|| $break_flag && $break_flag != 2
# if we are instructed to keep all old line breaks
|| !$rOpts->{'delete-old-newlines'}
# if this is a line of the form 'use overload'. A break here in
# the input file is a good break because it will allow the
# operators which follow to be formatted well. Without this
# break the formatting with -ci=4 -xci is poor, for example.
# use overload
# '+' => sub {
# print length $_[2], "\n";
# my ( $x, $y ) = _order(@_);
# Number::Roman->new( int $x + $y );
# },
# '-' => sub {
# my ( $x, $y ) = _order(@_);
# Number::Roman->new( int $x - $y );
# };
|| ( $max_index_to_go == 2
&& $types_to_go[0] eq 'k'
&& $tokens_to_go[0] eq 'use'
&& $tokens_to_go[$max_index_to_go] eq 'overload' )
)
{
$index_start_one_line_block = undef;
$self->end_batch();
}
else {
# Check for a soft break request
if ( $break_flag && $break_flag == 2 ) {
$self->set_forced_breakpoint($max_index_to_go);
}
# mark old line breakpoints in current output stream
if (
!$rOpts_ignore_old_breakpoints
# Mark essential old breakpoints if combination -iob -lp is
# used. These two options do not work well together, but
# we can avoid turning -iob off by ignoring -iob at certain
# essential line breaks. See also related code above.
# Fixes b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058
|| ( $rOpts_line_up_parentheses
&& $is_assignment_or_fat_comma{$type} )
)
{
$old_breakpoint_to_go[$max_index_to_go] = 1;
}
}
}
if ( $K_dangling_elsif && $rOpts_add_missing_else ) {
$self->add_missing_else();
}
return;
} ## end sub process_line_of_CODE
sub process_line_inner_loop {
my ( $self, $has_side_comment ) = @_;
#--------------------------------------------------------------------
# Loop to move all tokens from one input line to a newly forming batch
#--------------------------------------------------------------------
# Do not start a new batch with a blank space
if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
$K_first++;
}
foreach my $Ktoken_vars ( $K_first .. $K_last ) {
my $rtoken_vars = $rLL->[$Ktoken_vars];
#--------------
# handle blanks
#--------------
if ( $rtoken_vars->[_TYPE_] eq 'b' ) {
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
next;
}
#------------------
# handle non-blanks
#------------------
my $type = $rtoken_vars->[_TYPE_];
# If we are continuing after seeing a right curly brace, flush
# buffer unless we see what we are looking for, as in
# } else ...
if ($rbrace_follower) {
my $token = $rtoken_vars->[_TOKEN_];
if ( !$rbrace_follower->{$token} ) {
$self->end_batch() if ( $max_index_to_go >= 0 );
}
$rbrace_follower = undef;
}
my (
$block_type, $type_sequence,
$is_opening_BLOCK, $is_closing_BLOCK,
$nobreak_BEFORE_BLOCK
);
if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) {
my $token = $rtoken_vars->[_TOKEN_];
$type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
$block_type = $rblock_type_of_seqno->{$type_sequence};
if ( $block_type
&& $token eq $type
&& $block_type ne 't'
&& !$self->[_rshort_nested_]->{$type_sequence} )
{
if ( $type eq '{' ) {
$is_opening_BLOCK = 1;
$nobreak_BEFORE_BLOCK = $no_internal_newlines;
}
elsif ( $type eq '}' ) {
$is_closing_BLOCK = 1;
$nobreak_BEFORE_BLOCK = $no_internal_newlines;
}
else {
## error - block should be enclosed by curly brace
DEVEL_MODE && Fault(<<EOM);
block type '$block_type' has unexpected container type '$type'
EOM
}
}
}
#---------------------
# handle side comments
#---------------------
if ($has_side_comment) {
# if at last token ...
if ( $Ktoken_vars == $K_last ) {
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
next;
}
# if before last token ... do not allow breaks which would
# promote a side comment to a block comment
if ( $Ktoken_vars == $K_last - 1
|| $Ktoken_vars == $K_last - 2
&& $rLL->[ $K_last - 1 ]->[_TYPE_] eq 'b' )
{
$no_internal_newlines = 2;
}
}
# Process non-blank and non-comment tokens ...
#-----------------
# handle semicolon
#-----------------
if ( $type eq ';' ) {
my $next_nonblank_token_type = 'b';
my $next_nonblank_token = EMPTY_STRING;
if ( $Ktoken_vars < $K_last ) {
my $Knnb = $Ktoken_vars + 1;
$Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' );
$next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
$next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
}
if ( $rOpts_break_at_old_semicolon_breakpoints
&& ( $Ktoken_vars == $K_first )
&& $max_index_to_go >= 0
&& !defined($index_start_one_line_block) )
{
$self->end_batch();
}
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
$self->end_batch()
if (
!$no_internal_newlines
&& ( !$rOpts_keep_interior_semicolons
|| $Ktoken_vars >= $K_last )
&& ( $next_nonblank_token ne '}' )
);
}
#-----------
# handle '{'
#-----------
elsif ($is_opening_BLOCK) {
# Tentatively output this token. This is required before
# calling starting_one_line_block. We may have to unstore
# it, though, if we have to break before it.
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
# Look ahead to see if we might form a one-line block..
my $too_long =
$self->starting_one_line_block( $Ktoken_vars,
$K_last_nonblank_code, $K_last );
$self->clear_breakpoint_undo_stack();
# to simplify the logic below, set a flag to indicate if
# this opening brace is far from the keyword which introduces it
my $keyword_on_same_line = 1;
if (
$max_index_to_go >= 0
&& defined($K_last_nonblank_code)
&& $rLL->[$K_last_nonblank_code]->[_TYPE_] eq ')'
&& ( ( $rtoken_vars->[_LEVEL_] < $levels_to_go[0] )
|| $too_long )
)
{
$keyword_on_same_line = 0;
}
# Break before '{' if requested with -bl or -bli flag
my $want_break = $self->[_rbrace_left_]->{$type_sequence};
# But do not break if this token is welded to the left
if ( $total_weld_count
&& defined( $self->[_rK_weld_left_]->{$Ktoken_vars} ) )
{
$want_break = 0;
}
# Break BEFORE an opening '{' ...
if (
# if requested
$want_break
# and we were unable to start looking for a block,
&& !defined($index_start_one_line_block)
# or if it will not be on same line as its keyword, so that
# it will be outdented (eval.t, overload.t), and the user
# has not insisted on keeping it on the right
|| ( !$keyword_on_same_line
&& !$rOpts_opening_brace_always_on_right )
)
{
# but only if allowed
if ( !$nobreak_BEFORE_BLOCK ) {
# since we already stored this token, we must unstore it
$self->unstore_token_to_go();
# then output the line
$self->end_batch() if ( $max_index_to_go >= 0 );
# and now store this token at the start of a new line
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
}
}
# now output this line
$self->end_batch()
if ( $max_index_to_go >= 0 && !$no_internal_newlines );
}
#-----------
# handle '}'
#-----------
elsif ($is_closing_BLOCK) {
my $next_nonblank_token_type = 'b';
my $next_nonblank_token = EMPTY_STRING;
my $Knnb;
if ( $Ktoken_vars < $K_last ) {
$Knnb = $Ktoken_vars + 1;
$Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' );
$next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
$next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
}
# If there is a pending one-line block ..
if ( defined($index_start_one_line_block) ) {
# Fix for b1208: if a side comment follows this closing
# brace then we must include its length in the length test
# ... unless the -issl flag is set (fixes b1307-1309).
# Assume a minimum of 1 blank space to the comment.
my $added_length = 0;
if ( $has_side_comment
&& !$rOpts_ignore_side_comment_lengths
&& $next_nonblank_token_type eq '#' )
{
$added_length = 1 + $rLL->[$K_last]->[_TOKEN_LENGTH_];
}
# we have to terminate it if..
if (
# it is too long (final length may be different from
# initial estimate). note: must allow 1 space for this
# token
$self->excess_line_length( $index_start_one_line_block,
$max_index_to_go ) + $added_length >= 0
)
{
$index_start_one_line_block = undef;
}
}
# put a break before this closing curly brace if appropriate
$self->end_batch()
if ( $max_index_to_go >= 0
&& !$nobreak_BEFORE_BLOCK
&& !defined($index_start_one_line_block) );
# store the closing curly brace
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
# ok, we just stored a closing curly brace. Often, but
# not always, we want to end the line immediately.
# So now we have to check for special cases.
# if this '}' successfully ends a one-line block..
my $one_line_block_type = EMPTY_STRING;
my $keep_going;
if ( defined($index_start_one_line_block) ) {
# Remember the type of token just before the
# opening brace. It would be more general to use
# a stack, but this will work for one-line blocks.
$one_line_block_type =
$types_to_go[$index_start_one_line_block];
# we have to actually make it by removing tentative
# breaks that were set within it
$self->undo_forced_breakpoint_stack(0);
# For -lp, extend the nobreak to include a trailing
# terminal ','. This is because the -lp indentation was
# not known when making one-line blocks, so we may be able
# to move the line back to fit. Otherwise we may create a
# needlessly stranded comma on the next line.
my $iend_nobreak = $max_index_to_go - 1;
if ( $rOpts_line_up_parentheses
&& $next_nonblank_token_type eq ','
&& $Knnb eq $K_last )
{
my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
my $is_excluded =
$self->[_ris_excluded_lp_container_]->{$p_seqno};
$iend_nobreak = $max_index_to_go if ( !$is_excluded );
}
$self->set_nobreaks( $index_start_one_line_block,
$iend_nobreak );
# save starting block indexes so that sub correct_lp can
# check and adjust -lp indentation (c098)
push @{$ri_starting_one_line_block},
$index_start_one_line_block;
# then re-initialize for the next one-line block
$index_start_one_line_block = undef;
# then decide if we want to break after the '}' ..
# We will keep going to allow certain brace followers as in:
# do { $ifclosed = 1; last } unless $losing;
#
# But make a line break if the curly ends a
# significant block:
if (
(
$is_block_without_semicolon{$block_type}
# Follow users break point for
# one line block types U & G, such as a 'try' block
|| $one_line_block_type =~ /^[UG]$/
&& $Ktoken_vars == $K_last
)
# if needless semicolon follows we handle it later
&& $next_nonblank_token ne ';'
)
{
$self->end_batch()
unless ($no_internal_newlines);
}
}
# set string indicating what we need to look for brace follower
# tokens
if ( $is_if_unless_elsif_else{$block_type} ) {
$rbrace_follower = undef;
}
elsif ( $block_type eq 'do' ) {
$rbrace_follower = \%is_do_follower;
if (
$self->tight_paren_follows( $K_to_go[0], $Ktoken_vars )
)
{
$rbrace_follower = { ')' => 1 };
}
}
# added eval for borris.t
elsif ($is_sort_map_grep_eval{$block_type}
|| $one_line_block_type eq 'G' )
{
$rbrace_follower = undef;
$keep_going = 1;
}
# anonymous sub
elsif ( $self->[_ris_asub_block_]->{$type_sequence} ) {
if ($one_line_block_type) {
$rbrace_follower = \%is_anon_sub_1_brace_follower;
# Exceptions to help keep -lp intact, see git #74 ...
# Exception 1: followed by '}' on this line
if ( $Ktoken_vars < $K_last
&& $next_nonblank_token eq '}' )
{
$rbrace_follower = undef;
$keep_going = 1;
}
# Exception 2: followed by '}' on next line if -lp set.
# The -lp requirement allows the formatting to follow
# old breaks when -lp is not used, minimizing changes.
# Fixes issue c087.
elsif ($Ktoken_vars == $K_last
&& $rOpts_line_up_parentheses )
{
my $K_closing_container =
$self->[_K_closing_container_];
my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
my $Kc = $K_closing_container->{$p_seqno};
my $is_excluded =
$self->[_ris_excluded_lp_container_]->{$p_seqno};
$keep_going =
( defined($Kc)
&& $rLL->[$Kc]->[_TOKEN_] eq '}'
&& !$is_excluded
&& $Kc - $Ktoken_vars <= 2 );
$rbrace_follower = undef if ($keep_going);
}
else {
## not an exception
}
}
else {
$rbrace_follower = \%is_anon_sub_brace_follower;
}
}
# None of the above: specify what can follow a closing
# brace of a block which is not an
# if/elsif/else/do/sort/map/grep/eval
# Testfiles:
# 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
else {
$rbrace_follower = \%is_other_brace_follower;
}
# See if an elsif block is followed by another elsif or else;
# complain if not.
if ( $block_type eq 'elsif' ) {
# more code on this line ? ( this is unusual )
if ( $next_nonblank_token_type ne 'b'
&& $next_nonblank_token_type ne '#' )
{
# check for 'elsif' or 'else'
if ( !$is_elsif_else{$next_nonblank_token} ) {
write_logfile_entry("(No else block)\n");
# Note that we cannot add a missing else block
# in this case because more code follows the
# closing elsif brace on the same line.
if ( $rOpts_warn_missing_else && !DEVEL_MODE ) {
my $lno =
$rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
warning("$lno: No else block\n");
}
}
}
# no more code on this line, so check on next line
else {
my $K_next = $self->K_next_code($K_last);
if ( !defined($K_next)
|| $rLL->[$K_next]->[_TYPE_] ne 'k'
|| !$is_elsif_else{ $rLL->[$K_next]->[_TOKEN_] } )
{
$K_dangling_elsif = $Ktoken_vars;
write_logfile_entry("(No else block)\n");
if ( $rOpts_warn_missing_else && !DEVEL_MODE ) {
my $lno =
$rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
if ($rOpts_add_missing_else) {
warning(
"$lno: Adding missing else block\n");
}
else {
warning(
"$lno: No else block (use -ame to add one)\n"
);
}
}
}
}
}
# keep going after certain block types (map,sort,grep,eval)
# added eval for borris.t
if ($keep_going) {
# keep going
$rbrace_follower = undef;
}
# if no more tokens, postpone decision until re-entering
elsif ( ( $next_nonblank_token_type eq 'b' )
&& $rOpts_add_newlines )
{
if ( !$rbrace_follower ) {
$self->end_batch()
if (!$no_internal_newlines
&& $max_index_to_go >= 0 );
}
}
elsif ($rbrace_follower) {
if ( $rbrace_follower->{$next_nonblank_token} ) {
# Fix for b1385: keep break after a comma following a
# 'do' block. This could also be used for other block
# types, but that would cause a significant change in
# existing formatting without much benefit.
if ( $next_nonblank_token eq ','
&& $Knnb eq $K_last
&& $block_type eq 'do'
&& $rOpts_add_newlines
&& $self->is_trailing_comma($Knnb) )
{
$self->[_rbreak_after_Klast_]->{$K_last} = 1;
}
}
else {
$self->end_batch()
if (!$no_internal_newlines
&& $max_index_to_go >= 0 );
}
$rbrace_follower = undef;
}
else {
$self->end_batch()
if ( !$no_internal_newlines && $max_index_to_go >= 0 );
}
} ## end treatment of closing block token
#------------------------------
# handle here_doc target string
#------------------------------
elsif ( $type eq 'h' ) {
# no newlines after seeing here-target
$no_internal_newlines = 2;
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
}
#-----------------------------
# handle all other token types
#-----------------------------
else {
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
# break after a label if requested
if ( $rOpts_break_after_labels
&& $type eq 'J'
&& $rOpts_break_after_labels == 1 )
{
$self->end_batch()
unless ($no_internal_newlines);
}
}
# remember previous nonblank, non-comment OUTPUT token
$K_last_nonblank_code = $Ktoken_vars;
} ## end of loop over all tokens in this line
return;
} ## end sub process_line_inner_loop
} ## end closure process_line_of_CODE
sub is_trailing_comma {
my ( $self, $KK ) = @_;
# Given:
# $KK - index of a comma in token list
# Return:
# true if the comma at index $KK is a trailing comma
# false if not
my $rLL = $self->[_rLL_];
my $type_KK = $rLL->[$KK]->[_TYPE_];
if ( $type_KK ne ',' ) {
DEVEL_MODE
&& Fault("Bad call: expected type ',' but received '$type_KK'\n");
return;
}
my $Knnb = $self->K_next_nonblank($KK);
if ( defined($Knnb) ) {
my $type_sequence = $rLL->[$Knnb]->[_TYPE_SEQUENCE_];
my $type_Knnb = $rLL->[$Knnb]->[_TYPE_];
if ( $type_sequence && $is_closing_type{$type_Knnb} ) {
return 1;
}
}
return;
} ## end sub is_trailing_comma
sub tight_paren_follows {
my ( $self, $K_to_go_0, $K_ic ) = @_;
# Input parameters:
# $K_to_go_0 = first token index K of this output batch (=K_to_go[0])
# $K_ic = index of the closing do brace (=K_to_go[$max_index_to_go])
# Return parameter:
# false if we want a break after the closing do brace
# true if we do not want a break after the closing do brace
# We are at the closing brace of a 'do' block. See if this brace is
# followed by a closing paren, and if so, set a flag which indicates
# that we do not want a line break between the '}' and ')'.
# xxxxx ( ...... do { ... } ) {
# ^-------looking at this brace, K_ic
# Subscript notation:
# _i = inner container (braces in this case)
# _o = outer container (parens in this case)
# _io = inner opening = '{'
# _ic = inner closing = '}'
# _oo = outer opening = '('
# _oc = outer closing = ')'
# |--K_oo |--K_oc = outer container
# xxxxx ( ...... do { ...... } ) {
# |--K_io |--K_ic = inner container
# In general, the safe thing to do is return a 'false' value
# if the statement appears to be complex. This will have
# the downstream side-effect of opening up outer containers
# to help make complex code readable. But for simpler
# do blocks it can be preferable to keep the code compact
# by returning a 'true' value.
return unless defined($K_ic);
my $rLL = $self->[_rLL_];
# we should only be called at a closing block
my $seqno_i = $rLL->[$K_ic]->[_TYPE_SEQUENCE_];
return unless ($seqno_i); # shouldn't happen;
# This only applies if the next nonblank is a ')'
my $K_oc = $self->K_next_nonblank($K_ic);
return unless defined($K_oc);
my $token_next = $rLL->[$K_oc]->[_TOKEN_];
return unless ( $token_next eq ')' );
my $seqno_o = $rLL->[$K_oc]->[_TYPE_SEQUENCE_];
my $K_io = $self->[_K_opening_container_]->{$seqno_i};
my $K_oo = $self->[_K_opening_container_]->{$seqno_o};
return unless ( defined($K_io) && defined($K_oo) );
# RULE 1: Do not break before a closing signature paren
# (regardless of complexity). This is a fix for issue git#22.
# Looking for something like:
# sub xxx ( ... do { ... } ) {
# ^----- next block_type
my $K_test = $self->K_next_nonblank($K_oc);
if ( defined($K_test) && $rLL->[$K_test]->[_TYPE_] eq '{' ) {
my $seqno_test = $rLL->[$K_test]->[_TYPE_SEQUENCE_];
if ($seqno_test) {
if ( $self->[_ris_asub_block_]->{$seqno_test}
|| $self->[_ris_sub_block_]->{$seqno_test} )
{
return 1;
}
}
}
# RULE 2: Break if the contents within braces appears to be 'complex'. We
# base this decision on the number of tokens between braces.
# xxxxx ( ... do { ... } ) {
# ^^^^^^
# Although very simple, it has the advantages of (1) being insensitive to
# changes in lengths of identifier names, (2) easy to understand, implement
# and test. A test case for this is 't/snippets/long_line.in'.
# Example: $K_ic - $K_oo = 9 [Pass Rule 2]
# if ( do { $2 !~ /&/ } ) { ... }
# Example: $K_ic - $K_oo = 10 [Pass Rule 2]
# for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
# Example: $K_ic - $K_oo = 20 [Fail Rule 2]
# test_zero_args( "do-returned list slice", do { ( 10, 11 )[ 2, 3 ]; });
return if ( $K_ic - $K_io > 16 );
# RULE 3: break if the code between the opening '(' and the '{' is 'complex'
# As with the previous rule, we decide based on the token count
# xxxxx ( ... do { ... } ) {
# ^^^^^^^^
# Example: $K_ic - $K_oo = 9 [Pass Rule 2]
# $K_io - $K_oo = 4 [Pass Rule 3]
# if ( do { $2 !~ /&/ } ) { ... }
# Example: $K_ic - $K_oo = 10 [Pass rule 2]
# $K_io - $K_oo = 9 [Pass rule 3]
# for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
return if ( $K_io - $K_oo > 9 );
# RULE 4: Break if we have already broken this batch of output tokens
return if ( $K_oo < $K_to_go_0 );
# RULE 5: Break if input is not on one line
# For example, we will set the flag for the following expression
# written in one line:
# This has: $K_ic - $K_oo = 10 [Pass rule 2]
# $K_io - $K_oo = 8 [Pass rule 3]
# $self->debug( 'Error: ' . do { local $/; <$err> } );
# but we break after the brace if it is on multiple lines on input, since
# the user may prefer it on multiple lines:
# [Fail rule 5]
# $self->debug(
# 'Error: ' . do { local $/; <$err> }
# );
if ( !$rOpts_ignore_old_breakpoints ) {
my $iline_oo = $rLL->[$K_oo]->[_LINE_INDEX_];
my $iline_oc = $rLL->[$K_oc]->[_LINE_INDEX_];
return if ( $iline_oo != $iline_oc );
}
# OK to keep the paren tight
return 1;
} ## end sub tight_paren_follows
my %is_brace_semicolon_colon;
BEGIN {
my @q = qw( { } ; : );
@is_brace_semicolon_colon{@q} = (1) x scalar(@q);
}
sub starting_one_line_block {
# After seeing an opening curly brace, look for the closing brace and see
# if the entire block will fit on a line. This routine is not always right
# so a check is made later (at the closing brace) to make sure we really
# have a one-line block. We have to do this preliminary check, though,
# because otherwise we would always break at a semicolon within a one-line
# block if the block contains multiple statements.
# Given:
# $Kj = index of opening brace
# $K_last_nonblank = index of previous nonblank code token
# $K_last = index of last token of input line
# Calls 'create_one_line_block' if one-line block might be formed.
# Also returns a flag '$too_long':
# true = distance from opening keyword to OPENING brace exceeds
# the maximum line length.
# false (simple return) => not too long
# Note that this flag is for distance from the statement start to the
# OPENING brace, not the closing brace.
my ( $self, $Kj, $K_last_nonblank, $K_last ) = @_;
my $rbreak_container = $self->[_rbreak_container_];
my $rshort_nested = $self->[_rshort_nested_];
my $rLL = $self->[_rLL_];
my $K_opening_container = $self->[_K_opening_container_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
# kill any current block - we can only go 1 deep
create_one_line_block();
my $i_start = 0;
# This routine should not have been called if there are no tokens in the
# 'to_go' arrays of previously stored tokens. A previous call to
# 'store_token_to_go' should have stored an opening brace. An error here
# indicates that a programming change may have caused a flush operation to
# clean out the previously stored tokens.
if ( !defined($max_index_to_go) || $max_index_to_go < 0 ) {
Fault("program bug: store_token_to_go called incorrectly\n")
if (DEVEL_MODE);
return;
}
# Return if block should be broken
my $type_sequence_j = $rLL->[$Kj]->[_TYPE_SEQUENCE_];
if ( $rbreak_container->{$type_sequence_j} ) {
return;
}
my $ris_bli_container = $self->[_ris_bli_container_];
my $is_bli = $ris_bli_container->{$type_sequence_j};
my $block_type = $rblock_type_of_seqno->{$type_sequence_j};
$block_type = EMPTY_STRING unless ( defined($block_type) );
my $previous_nonblank_token = EMPTY_STRING;
my $i_last_nonblank = -1;
if ( defined($K_last_nonblank) ) {
$i_last_nonblank = $K_last_nonblank - $K_to_go[0];
if ( $i_last_nonblank >= 0 ) {
$previous_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
}
}
#---------------------------------------------------------------------
# find the starting keyword for this block (such as 'if', 'else', ...)
#---------------------------------------------------------------------
if (
$max_index_to_go == 0
##|| $block_type =~ /^[\{\}\;\:]$/
|| $is_brace_semicolon_colon{$block_type}
|| substr( $block_type, 0, 7 ) eq 'package'
)
{
$i_start = $max_index_to_go;
}
# the previous nonblank token should start these block types
elsif (
$i_last_nonblank >= 0
&& ( $previous_nonblank_token eq $block_type
|| $self->[_ris_asub_block_]->{$type_sequence_j}
|| $self->[_ris_sub_block_]->{$type_sequence_j}
|| substr( $block_type, -2, 2 ) eq '()' )
)
{
$i_start = $i_last_nonblank;
# For signatures and extended syntax ...
# If this brace follows a parenthesized list, we should look back to
# find the keyword before the opening paren because otherwise we might
# form a one line block which stays intact, and cause the parenthesized
# expression to break open. That looks bad.
if ( $tokens_to_go[$i_start] eq ')' ) {
# Find the opening paren
my $K_start = $K_to_go[$i_start];
return unless defined($K_start);
my $seqno = $type_sequence_to_go[$i_start];
return unless ($seqno);
my $K_opening = $K_opening_container->{$seqno};
return if ( !defined($K_opening) );
my $i_opening = $i_start + ( $K_opening - $K_start );
# give up if not on this line
return if ( $i_opening < 0 );
$i_start = $i_opening;
# go back one token before the opening paren
if ( $i_start > 0 ) { $i_start-- }
if ( $types_to_go[$i_start] eq 'b' && $i_start > 0 ) { $i_start--; }
my $lev = $levels_to_go[$i_start];
if ( $lev > $rLL->[$Kj]->[_LEVEL_] ) { return }
}
}
elsif ( $previous_nonblank_token eq ')' ) {
# For something like "if (xxx) {", the keyword "if" will be
# just after the most recent break. This will be 0 unless
# we have just killed a one-line block and are starting another.
# (doif.t)
# Note: cannot use inext_index_to_go[] here because that array
# is still being constructed.
$i_start = $index_max_forced_break + 1;
if ( $types_to_go[$i_start] eq 'b' ) {
$i_start++;
}
# Patch to avoid breaking short blocks defined with extended_syntax:
# Strip off any trailing () which was added in the parser to mark
# the opening keyword. For example, in the following
# create( TypeFoo $e) {$bubba}
# the blocktype would be marked as create()
my $stripped_block_type = $block_type;
if ( substr( $block_type, -2, 2 ) eq '()' ) {
$stripped_block_type = substr( $block_type, 0, -2 );
}
if ( $tokens_to_go[$i_start] ne $stripped_block_type ) {
return;
}
}
# patch for SWITCH/CASE to retain one-line case/when blocks
elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
# Note: cannot use inext_index_to_go[] here because that array
# is still being constructed.
$i_start = $index_max_forced_break + 1;
if ( $types_to_go[$i_start] eq 'b' ) {
$i_start++;
}
if ( $tokens_to_go[$i_start] ne $block_type ) {
return;
}
}
else {
#-------------------------------------------
# Couldn't find start - return too_long flag
#-------------------------------------------
return 1;
}
my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
my $maximum_line_length =
$maximum_line_length_at_level[ $levels_to_go[$i_start] ];
# see if distance to the opening container is too great to even start
if ( $pos > $maximum_line_length ) {
#------------------------------
# too long to the opening token
#------------------------------
return 1;
}
#-----------------------------------------------------------------------
# OK so far: the statement is not to long just to the OPENING token. Now
# see if everything to the closing token will fit on one line
#-----------------------------------------------------------------------
# This is part of an update to fix cases b562 .. b983
my $K_closing = $self->[_K_closing_container_]->{$type_sequence_j};
return unless ( defined($K_closing) );
my $container_length = $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
$rLL->[$Kj]->[_CUMULATIVE_LENGTH_];
my $excess = $pos + 1 + $container_length - $maximum_line_length;
# Add a small tolerance for welded tokens (case b901)
if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence_j) ) {
$excess += 2;
}
if ( $excess > 0 ) {
# line is too long... there is no chance of forming a one line block
# if the excess is more than 1 char
return if ( $excess > 1 );
# ... and give up if it is not a one-line block on input.
# note: for a one-line block on input, it may be possible to keep
# it as a one-line block (by removing a needless semicolon ).
my $K_start = $K_to_go[$i_start];
my $ldiff =
$rLL->[$K_closing]->[_LINE_INDEX_] - $rLL->[$K_start]->[_LINE_INDEX_];
return if ($ldiff);
}
#------------------------------------------------------------------
# Loop to check contents and length of the potential one-line block
#------------------------------------------------------------------
foreach my $Ki ( $Kj + 1 .. $K_last ) {
# old whitespace could be arbitrarily large, so don't use it
if ( $rLL->[$Ki]->[_TYPE_] eq 'b' ) { $pos += 1 }
else { $pos += $rLL->[$Ki]->[_TOKEN_LENGTH_] }
# ignore some small blocks
my $type_sequence_i = $rLL->[$Ki]->[_TYPE_SEQUENCE_];
my $nobreak = $rshort_nested->{$type_sequence_i};
# Return false result if we exceed the maximum line length,
if ( $pos > $maximum_line_length ) {
return;
}
# keep going for non-containers
elsif ( !$type_sequence_i ) {
}
# return if we encounter another opening brace before finding the
# closing brace.
elsif ($rLL->[$Ki]->[_TOKEN_] eq '{'
&& $rLL->[$Ki]->[_TYPE_] eq '{'
&& $rblock_type_of_seqno->{$type_sequence_i}
&& !$nobreak )
{
return;
}
# if we find our closing brace..
elsif ($rLL->[$Ki]->[_TOKEN_] eq '}'
&& $rLL->[$Ki]->[_TYPE_] eq '}'
&& $rblock_type_of_seqno->{$type_sequence_i}
&& !$nobreak )
{
# be sure any trailing comment also fits on the line
my $Ki_nonblank = $Ki;
if ( $Ki_nonblank < $K_last ) {
$Ki_nonblank++;
if ( $rLL->[$Ki_nonblank]->[_TYPE_] eq 'b'
&& $Ki_nonblank < $K_last )
{
$Ki_nonblank++;
}
}
# Patch for one-line sort/map/grep/eval blocks with side comments:
# We will ignore the side comment length for sort/map/grep/eval
# because this can lead to statements which change every time
# perltidy is run. Here is an example from Denis Moskowitz which
# oscillates between these two states without this patch:
## --------
## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
## @baz;
##
## grep {
## $_->foo ne 'bar'
## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
## @baz;
## --------
# When the first line is input it gets broken apart by the main
# line break logic in sub process_line_of_CODE.
# When the second line is input it gets recombined by
# process_line_of_CODE and passed to the output routines. The
# output routines (break_long_lines) do not break it apart
# because the bond strengths are set to the highest possible value
# for grep/map/eval/sort blocks, so the first version gets output.
# It would be possible to fix this by changing bond strengths,
# but they are high to prevent errors in older versions of perl.
# See c100 for eval test.
if ( $Ki < $K_last
&& $rLL->[$K_last]->[_TYPE_] eq '#'
&& $rLL->[$K_last]->[_LEVEL_] == $rLL->[$Ki]->[_LEVEL_]
&& !$rOpts_ignore_side_comment_lengths
&& !$is_sort_map_grep_eval{$block_type}
&& $K_last - $Ki_nonblank <= 2 )
{
# Only include the side comment for if/else/elsif/unless if it
# immediately follows (because the current '$rbrace_follower'
# logic for these will give an immediate brake after these
# closing braces). So for example a line like this
# if (...) { ... } ; # very long comment......
# will already break like this:
# if (...) { ... }
# ; # very long comment......
# so we do not need to include the length of the comment, which
# would break the block. Project 'bioperl' has coding like this.
## !~ /^(if|else|elsif|unless)$/
if ( !$is_if_unless_elsif_else{$block_type}
|| $K_last == $Ki_nonblank )
{
$Ki_nonblank = $K_last;
$pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_];
if ( $Ki_nonblank > $Ki + 1 ) {
# source whitespace could be anything, assume
# at least one space before the hash on output
if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) {
$pos += 1;
}
else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] }
}
if ( $pos >= $maximum_line_length ) {
return;
}
}
}
#--------------------------
# ok, it's a one-line block
#--------------------------
create_one_line_block($i_start);
return;
}
# just keep going for other characters
else {
}
}
#--------------------------------------------------
# End Loop to examine tokens in potential one-block
#--------------------------------------------------
# We haven't hit the closing brace, but there is still space. So the
# question here is, should we keep going to look at more lines in hopes of
# forming a new one-line block, or should we stop right now. The problem
# with continuing is that we will not be able to honor breaks before the
# opening brace if we continue.
# Typically we will want to keep trying to make one-line blocks for things
# like sort/map/grep/eval. But it is not always a good idea to make as
# many one-line blocks as possible, so other types are not done. The user
# can always use -mangle.
# If we want to keep going, we will create a new one-line block.
# The blocks which we can keep going are in a hash, but we never want
# to continue if we are at a '-bli' block.
if ( $want_one_line_block{$block_type} && !$is_bli ) {
my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence_j};
my $semicolon_count = $rtype_count
&& $rtype_count->{';'} ? $rtype_count->{';'} : 0;
# Ignore a terminal semicolon in the count
if ( $semicolon_count <= 2 ) {
my $K_closing_container = $self->[_K_closing_container_];
my $K_closing_j = $K_closing_container->{$type_sequence_j};
my $Kp = $self->K_previous_nonblank($K_closing_j);
if ( defined($Kp)
&& $rLL->[$Kp]->[_TYPE_] eq ';' )
{
$semicolon_count -= 1;
}
}
if ( $semicolon_count <= 0 ) {
create_one_line_block($i_start);
}
elsif ( $semicolon_count == 1 && $block_type eq 'eval' ) {
# Mark short broken eval blocks for possible later use in
# avoiding adding spaces before a 'package' line. This is not
# essential but helps keep newer and older formatting the same.
$self->[_ris_short_broken_eval_block_]->{$type_sequence_j} = 1;
}
else {
## ok
}
}
return;
} ## end sub starting_one_line_block
sub unstore_token_to_go {
# remove most recent token from output stream
my $self = shift;
if ( $max_index_to_go > 0 ) {
$max_index_to_go--;
}
else {
$max_index_to_go = UNDEFINED_INDEX;
}
return;
} ## end sub unstore_token_to_go
sub compare_indentation_levels {
# Check to see if output line tabbing agrees with input line
# this can be very useful for debugging a script which has an extra
# or missing brace.
my ( $self, $K_first, $guessed_indentation_level, $line_number ) = @_;
return unless ( defined($K_first) );
my $rLL = $self->[_rLL_];
# ignore a line with a leading blank token - issue c195
my $type = $rLL->[$K_first]->[_TYPE_];
return if ( $type eq 'b' );
my $structural_indentation_level = $self->[_radjusted_levels_]->[$K_first];
# record max structural depth for log file
if ( $structural_indentation_level > $self->[_maximum_BLOCK_level_] ) {
$self->[_maximum_BLOCK_level_] = $structural_indentation_level;
$self->[_maximum_BLOCK_level_at_line_] = $line_number;
}
my $type_sequence = $rLL->[$K_first]->[_TYPE_SEQUENCE_];
my $is_closing_block =
$type_sequence
&& $self->[_rblock_type_of_seqno_]->{$type_sequence}
&& $type eq '}';
if ( $guessed_indentation_level ne $structural_indentation_level ) {
$self->[_last_tabbing_disagreement_] = $line_number;
if ($is_closing_block) {
if ( !$self->[_in_brace_tabbing_disagreement_] ) {
$self->[_in_brace_tabbing_disagreement_] = $line_number;
}
if ( !$self->[_first_brace_tabbing_disagreement_] ) {
$self->[_first_brace_tabbing_disagreement_] = $line_number;
}
}
if ( !$self->[_in_tabbing_disagreement_] ) {
$self->[_tabbing_disagreement_count_]++;
if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
write_logfile_entry(
"Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
);
}
$self->[_in_tabbing_disagreement_] = $line_number;
$self->[_first_tabbing_disagreement_] = $line_number
unless ( $self->[_first_tabbing_disagreement_] );
}
}
else {
$self->[_in_brace_tabbing_disagreement_] = 0 if ($is_closing_block);
my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
if ($in_tabbing_disagreement) {
if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
write_logfile_entry(
"End indentation disagreement from input line $in_tabbing_disagreement\n"
);
if ( $self->[_tabbing_disagreement_count_] == MAX_NAG_MESSAGES )
{
write_logfile_entry(
"No further tabbing disagreements will be noted\n");
}
}
$self->[_in_tabbing_disagreement_] = 0;
}
}
return;
} ## end sub compare_indentation_levels
###################################################
# CODE SECTION 8: Utilities for setting breakpoints
###################################################
{ ## begin closure set_forced_breakpoint
my @forced_breakpoint_undo_stack;
# These are global vars for efficiency:
# my $forced_breakpoint_count;
# my $forced_breakpoint_undo_count;
# my $index_max_forced_break;
# Break before or after certain tokens based on user settings
my %break_before_or_after_token;
BEGIN {
# Updated to use all operators. This fixes case b1054
# Here is the previous simplified version:
## my @q = qw( . : ? and or xor && || );
my @q = @all_operators;
push @q, ',';
@break_before_or_after_token{@q} = (1) x scalar(@q);
} ## end BEGIN
sub set_fake_breakpoint {
# Just bump up the breakpoint count as a signal that there are breaks.
# This is useful if we have breaks but may want to postpone deciding
# where to make them.
$forced_breakpoint_count++;
return;
} ## end sub set_fake_breakpoint
use constant DEBUG_FORCE => 0;
sub set_forced_breakpoint {
my ( $self, $i ) = @_;
# Set a breakpoint AFTER the token at index $i in the _to_go arrays.
# Exceptions:
# - If the token at index $i is a blank, backup to $i-1 to
# get to the previous nonblank token.
# - For certain tokens, the break may be placed BEFORE the token
# at index $i, depending on user break preference settings.
# - If a break is made after an opening token, then a break will
# also be made before the corresponding closing token.
# Returns '$i_nonblank':
# = index of the token after which the breakpoint was actually placed
# = undef if breakpoint was not set.
my $i_nonblank;
if ( !defined($i) || $i < 0 ) {
# Calls with bad index $i are harmless but waste time and should
# be caught and eliminated during code development.
if (DEVEL_MODE) {
my ( $a, $b, $c ) = caller();
Fault(
"Bad call to forced breakpoint from $a $b $c ; called with i=$i; please fix\n"
);
}
return;
}
# Break after token $i
$i_nonblank = $self->set_forced_breakpoint_AFTER($i);
# If we break at an opening container..break at the closing
my $set_closing;
if ( defined($i_nonblank)
&& $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } )
{
$set_closing = 1;
$self->set_closing_breakpoint($i_nonblank);
}
DEBUG_FORCE && do {
my ( $a, $b, $c ) = caller();
my $msg =
"FORCE $forced_breakpoint_count after call from $a $c with i=$i max=$max_index_to_go";
if ( !defined($i_nonblank) ) {
$i = EMPTY_STRING unless defined($i);
$msg .= " but could not set break after i='$i'\n";
}
else {
my $nobr = $nobreak_to_go[$i_nonblank];
$nobr = 0 if ( !defined($nobr) );
$msg .= <<EOM;
set break after $i_nonblank: tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobr
EOM
if ( defined($set_closing) ) {
$msg .=
" Also set closing breakpoint corresponding to this token\n";
}
}
print {*STDOUT} $msg;
};
return $i_nonblank;
} ## end sub set_forced_breakpoint
sub set_forced_breakpoint_AFTER {
my ( $self, $i ) = @_;
# This routine is only called by sub set_forced_breakpoint and
# sub set_closing_breakpoint.
# Set a breakpoint AFTER the token at index $i in the _to_go arrays.
# Exceptions:
# - If the token at index $i is a blank, backup to $i-1 to
# get to the previous nonblank token.
# - For certain tokens, the break may be placed BEFORE the token
# at index $i, depending on user break preference settings.
# Returns:
# - the index of the token after which the break was set, or
# - undef if no break was set
return if ( !defined($i) );
return if ( $i < 0 );
# Back up at a blank so we have a token to examine.
# This was added to fix for cases like b932 involving an '=' break.
if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
# Never break between welded tokens
return
if ( $total_weld_count
&& $self->[_rK_weld_right_]->{ $K_to_go[$i] } );
my $token = $tokens_to_go[$i];
my $type = $types_to_go[$i];
# For certain tokens, use user settings to decide if we break before or
# after it
if ( $break_before_or_after_token{$token}
&& ( $type eq $token || $type eq 'k' ) )
{
if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
}
# breaks are forced before 'if' and 'unless'
elsif ( $is_if_unless{$token} && $type eq 'k' ) { $i-- }
else {
## ok
}
if ( $i >= 0 && $i <= $max_index_to_go ) {
my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
if ( $i_nonblank >= 0
&& !$nobreak_to_go[$i_nonblank]
&& !$forced_breakpoint_to_go[$i_nonblank] )
{
$forced_breakpoint_to_go[$i_nonblank] = 1;
if ( $i_nonblank > $index_max_forced_break ) {
$index_max_forced_break = $i_nonblank;
}
$forced_breakpoint_count++;
$forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ]
= $i_nonblank;
# success
return $i_nonblank;
}
}
return;
} ## end sub set_forced_breakpoint_AFTER
sub clear_breakpoint_undo_stack {
my ($self) = @_;
$forced_breakpoint_undo_count = 0;
return;
}
use constant DEBUG_UNDOBP => 0;
sub undo_forced_breakpoint_stack {
my ( $self, $i_start ) = @_;
# Given $i_start, a non-negative index the 'undo stack' of breakpoints,
# remove all breakpoints from the top of the 'undo stack' down to and
# including index $i_start.
# The 'undo stack' is a stack of all breakpoints made for a batch of
# code.
if ( $i_start < 0 ) {
$i_start = 0;
my ( $a, $b, $c ) = caller();
# Bad call, can only be due to a recent programming change.
Fault(
"Program Bug: undo_forced_breakpoint_stack from $a $c has bad i=$i_start "
) if (DEVEL_MODE);
return;
}
while ( $forced_breakpoint_undo_count > $i_start ) {
my $i =
$forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
if ( $i >= 0 && $i <= $max_index_to_go ) {
$forced_breakpoint_to_go[$i] = 0;
$forced_breakpoint_count--;
DEBUG_UNDOBP && do {
my ( $a, $b, $c ) = caller();
print {*STDOUT}
"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
};
}
# shouldn't happen, but not a critical error
else {
if (DEVEL_MODE) {
my ( $a, $b, $c ) = caller();
Fault(<<EOM);
Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go
EOM
}
}
}
return;
} ## end sub undo_forced_breakpoint_stack
} ## end closure set_forced_breakpoint
{ ## begin closure set_closing_breakpoint
my %postponed_breakpoint;
sub initialize_postponed_breakpoint {
%postponed_breakpoint = ();
return;
}
sub has_postponed_breakpoint {
my ($seqno) = @_;
return $postponed_breakpoint{$seqno};
}
sub set_closing_breakpoint {
# set a breakpoint at a matching closing token
my ( $self, $i_break ) = @_;
if ( defined( $mate_index_to_go[$i_break] ) ) {
# Don't reduce the '2' in the statement below.
# Test files: attrib.t, BasicLyx.pm.html
if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
# break before } ] and ), but sub set_forced_breakpoint will decide
# to break before or after a ? and :
my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
$self->set_forced_breakpoint_AFTER(
$mate_index_to_go[$i_break] - $inc );
}
}
else {
my $type_sequence = $type_sequence_to_go[$i_break];
if ($type_sequence) {
$postponed_breakpoint{$type_sequence} = 1;
}
}
return;
} ## end sub set_closing_breakpoint
} ## end closure set_closing_breakpoint
#########################################
# CODE SECTION 9: Process batches of code
#########################################
{ ## begin closure grind_batch_of_CODE
# The routines in this closure begin the processing of a 'batch' of code.
# A variable to keep track of consecutive nonblank lines so that we can
# insert occasional blanks
my @nonblank_lines_at_depth;
# A variable to remember maximum size of previous batches; this is needed
# by the logical padding routine
my $peak_batch_size;
my $batch_count;
# variables to keep track of indentation of unmatched containers.
my %saved_opening_indentation;
sub initialize_grind_batch_of_CODE {
@nonblank_lines_at_depth = ();
$peak_batch_size = 0;
$batch_count = 0;
%saved_opening_indentation = ();
return;
} ## end sub initialize_grind_batch_of_CODE
# sub grind_batch_of_CODE receives sections of code which are the longest
# possible lines without a break. In other words, it receives what is left
# after applying all breaks forced by blank lines, block comments, side
# comments, pod text, and structural braces. Its job is to break this code
# down into smaller pieces, if necessary, which fit within the maximum
# allowed line length. Then it sends the resulting lines of code on down
# the pipeline to the VerticalAligner package, breaking the code into
# continuation lines as necessary. The batch of tokens are in the "to_go"
# arrays. The name 'grind' is slightly suggestive of a machine continually
# breaking down long lines of code, but mainly it is unique and easy to
# remember and find with an editor search.
# The two routines 'process_line_of_CODE' and 'grind_batch_of_CODE' work
# together in the following way:
# - 'process_line_of_CODE' receives the original INPUT lines one-by-one and
# combines them into the largest sequences of tokens which might form a new
# line.
# - 'grind_batch_of_CODE' determines which tokens will form the OUTPUT
# lines.
# So sub 'process_line_of_CODE' builds up the longest possible continuous
# sequences of tokens, regardless of line length, and then
# grind_batch_of_CODE breaks these sequences back down into the new output
# lines.
# Sub 'grind_batch_of_CODE' ships its output lines to the vertical aligner.
use constant DEBUG_GRIND => 0;
sub check_grind_input {
# Check for valid input to sub grind_batch_of_CODE. An error here
# would most likely be due to an error in 'sub store_token_to_go'.
my ($self) = @_;
# Be sure there are tokens in the batch
if ( $max_index_to_go < 0 ) {
Fault(<<EOM);
sub grind incorrectly called with max_index_to_go=$max_index_to_go
EOM
}
my $Klimit = $self->[_Klimit_];
# The local batch tokens must be a continuous part of the global token
# array.
my $KK;
foreach my $ii ( 0 .. $max_index_to_go ) {
my $Km = $KK;
$KK = $K_to_go[$ii];
if ( !defined($KK) || $KK < 0 || $KK > $Klimit ) {
$KK = '(undef)' unless defined($KK);
Fault(<<EOM);
at batch index at i=$ii, the value of K_to_go[$ii] = '$KK' is out of the valid range (0 - $Klimit)
EOM
}
if ( $ii > 0 && $KK != $Km + 1 ) {
my $im = $ii - 1;
Fault(<<EOM);
Non-sequential K indexes: i=$im has Km=$Km; but i=$ii has K=$KK; expecting K = Km+1
EOM
}
}
return;
} ## end sub check_grind_input
# This filter speeds up a critical if-test
my %quick_filter;
BEGIN {
my @q = qw# L { ( [ R ] ) } ? : f => #;
push @q, ',';
@quick_filter{@q} = (1) x scalar(@q);
}
sub grind_batch_of_CODE {
my ($self) = @_;
#-----------------------------------------------------------------
# This sub directs the formatting of one complete batch of tokens.
# The tokens of the batch are in the '_to_go' arrays.
#-----------------------------------------------------------------
my $this_batch = $self->[_this_batch_];
$this_batch->[_peak_batch_size_] = $peak_batch_size;
$this_batch->[_batch_count_] = ++$batch_count;
$self->check_grind_input() if (DEVEL_MODE);
# This routine is only called from sub flush_batch_of_code, so that
# routine is a better spot for debugging.
DEBUG_GRIND && do {
my $token = my $type = EMPTY_STRING;
if ( $max_index_to_go >= 0 ) {
$token = $tokens_to_go[$max_index_to_go];
$type = $types_to_go[$max_index_to_go];
}
my $output_str = EMPTY_STRING;
if ( $max_index_to_go > 20 ) {
my $mm = $max_index_to_go - 10;
$output_str =
join( EMPTY_STRING, @tokens_to_go[ 0 .. 10 ] ) . " ... "
. join( EMPTY_STRING,
@tokens_to_go[ $mm .. $max_index_to_go ] );
}
else {
$output_str = join EMPTY_STRING,
@tokens_to_go[ 0 .. $max_index_to_go ];
}
print {*STDOUT} <<EOM;
grind got batch number $batch_count with $max_index_to_go tokens, last type '$type' tok='$token', text:
$output_str
EOM
};
# Remove any trailing blank, which is possible (c192 has example)
if ( $max_index_to_go >= 0 && $types_to_go[$max_index_to_go] eq 'b' ) {
$max_index_to_go -= 1;
}
return if ( $max_index_to_go < 0 );
my $lp_object_count_this_batch;
if ($rOpts_line_up_parentheses) {
$this_batch->[_lp_object_count_this_batch_] =
$lp_object_count_this_batch = $self->set_lp_indentation();
}
#-----------------------------------------------------------
# Shortcut for block comments. But not for block comments
# with lp because they must use the lp corrector step below.
#-----------------------------------------------------------
if ( !$max_index_to_go
&& $types_to_go[0] eq '#'
&& !$lp_object_count_this_batch )
{
my $ibeg = 0;
$this_batch->[_ri_first_] = [$ibeg];
$this_batch->[_ri_last_] = [$ibeg];
$self->convey_batch_to_vertical_aligner();
my $level = $levels_to_go[$ibeg];
$self->[_last_line_leading_type_] = $types_to_go[$ibeg];
$self->[_last_line_leading_level_] = $level;
$nonblank_lines_at_depth[$level] = 1;
return;
}
#-------------
# Normal route
#-------------
my $rLL = $self->[_rLL_];
#-------------------------------------------------------
# Loop over the batch to initialize some batch variables
#-------------------------------------------------------
my $comma_count_in_batch = 0;
my @colon_list;
my @ix_seqno_controlling_ci;
my %comma_arrow_count;
my $comma_arrow_count_contained = 0;
my @unmatched_closing_indexes_in_this_batch;
my @unmatched_opening_indexes_in_this_batch;
my @i_for_semicolon;
foreach my $i ( 0 .. $max_index_to_go ) {
if ( $types_to_go[$i] eq 'b' ) {
$inext_to_go[$i] = $inext_to_go[ $i - 1 ] = $i + 1;
next;
}
$inext_to_go[$i] = $i + 1;
# This is an optional shortcut to save a bit of time by skipping
# most tokens. Note: the filter may need to be updated if the
# next 'if' tests are ever changed to include more token types.
next if ( !$quick_filter{ $types_to_go[$i] } );
my $type = $types_to_go[$i];
# gather info needed by sub break_long_lines
if ( $type_sequence_to_go[$i] ) {
my $seqno = $type_sequence_to_go[$i];
my $token = $tokens_to_go[$i];
# remember indexes of any tokens controlling xci
# in this batch. This list is needed by sub undo_ci.
if ( $self->[_ris_seqno_controlling_ci_]->{$seqno} ) {
push @ix_seqno_controlling_ci, $i;
}
if ( $is_opening_sequence_token{$token} ) {
if ( $self->[_rbreak_container_]->{$seqno} ) {
$self->set_forced_breakpoint($i);
}
push @unmatched_opening_indexes_in_this_batch, $i;
if ( $type eq '?' ) {
push @colon_list, $type;
}
}
else { ## $is_closing_sequence_token{$token}
if ( $i > 0 && $self->[_rbreak_container_]->{$seqno} ) {
$self->set_forced_breakpoint( $i - 1 );
}
my $i_mate = pop @unmatched_opening_indexes_in_this_batch;
if ( defined($i_mate) && $i_mate >= 0 ) {
if ( $type_sequence_to_go[$i_mate] ==
$type_sequence_to_go[$i] )
{
$mate_index_to_go[$i] = $i_mate;
$mate_index_to_go[$i_mate] = $i;
my $cac = $comma_arrow_count{$seqno};
$comma_arrow_count_contained += $cac if ($cac);
}
else {
push @unmatched_opening_indexes_in_this_batch,
$i_mate;
push @unmatched_closing_indexes_in_this_batch, $i;
}
}
else {
push @unmatched_closing_indexes_in_this_batch, $i;
}
if ( $type eq ':' ) {
push @colon_list, $type;
}
}
} ## end if ($seqno)
elsif ( $type eq ',' ) { $comma_count_in_batch++; }
elsif ( $type eq '=>' ) {
if (@unmatched_opening_indexes_in_this_batch) {
my $j = $unmatched_opening_indexes_in_this_batch[-1];
my $seqno = $type_sequence_to_go[$j];
$comma_arrow_count{$seqno}++;
}
}
elsif ( $type eq 'f' ) {
push @i_for_semicolon, $i;
}
else {
## not a special type
}
} ## end for ( my $i = 0 ; $i <=...)
# Break at a single interior C-style for semicolon in this batch (c154)
if ( @i_for_semicolon && @i_for_semicolon == 1 ) {
my $i = $i_for_semicolon[0];
my $inext = $inext_to_go[$i];
if ( $inext <= $max_index_to_go && $types_to_go[$inext] ne '#' ) {
$self->set_forced_breakpoint($i);
}
}
my $is_unbalanced_batch = @unmatched_opening_indexes_in_this_batch +
@unmatched_closing_indexes_in_this_batch;
if (@unmatched_opening_indexes_in_this_batch) {
$this_batch->[_runmatched_opening_indexes_] =
\@unmatched_opening_indexes_in_this_batch;
}
if (@ix_seqno_controlling_ci) {
$this_batch->[_rix_seqno_controlling_ci_] =
\@ix_seqno_controlling_ci;
}
#------------------------
# Set special breakpoints
#------------------------
# If this line ends in a code block brace, set breaks at any
# previous closing code block braces to breakup a chain of code
# blocks on one line. This is very rare but can happen for
# user-defined subs. For example we might be looking at this:
# BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
my $saw_good_break; # flag to force breaks even if short line
if (
# looking for opening or closing block brace
$block_type_to_go[$max_index_to_go]
# never any good breaks if just one token
&& $max_index_to_go > 0
# but not one of these which are never duplicated on a line:
# until|while|for|if|elsif|else
&& !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go]
}
)
{
my $lev = $nesting_depth_to_go[$max_index_to_go];
# Walk backwards from the end and
# set break at any closing block braces at the same level.
# But quit if we are not in a chain of blocks.
foreach my $i ( reverse( 0 .. $max_index_to_go - 1 ) ) {
last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
next if ( $levels_to_go[$i] > $lev ); # skip past higher level
if ( $block_type_to_go[$i] ) {
if ( $tokens_to_go[$i] eq '}' ) {
$self->set_forced_breakpoint($i);
$saw_good_break = 1;
}
}
# quit if we see anything besides words, function, blanks
# at this level
elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
else {
## keep going
}
}
}
#-----------------------------------------------
# insertion of any blank lines before this batch
#-----------------------------------------------
my $imin = 0;
my $imax = $max_index_to_go;
# trim any blank tokens - for safety, but should not be necessary
if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
if ( $imin > $imax ) {
if (DEVEL_MODE) {
my $K0 = $K_to_go[0];
my $lno = EMPTY_STRING;
if ( defined($K0) ) { $lno = $rLL->[$K0]->[_LINE_INDEX_] + 1 }
Fault(<<EOM);
Strange: received batch containing only blanks near input line $lno: after trimming imin=$imin, imax=$imax
EOM
}
return;
}
my $last_line_leading_type = $self->[_last_line_leading_type_];
my $last_line_leading_level = $self->[_last_line_leading_level_];
my $leading_type = $types_to_go[0];
my $leading_level = $levels_to_go[0];
# add blank line(s) before certain key types but not after a comment
if ( $last_line_leading_type ne '#' ) {
my $blank_count = 0;
my $leading_token = $tokens_to_go[0];
# break before certain key blocks except one-liners
if ( $leading_type eq 'k' ) {
if ( $leading_token eq 'BEGIN' || $leading_token eq 'END' ) {
$blank_count = $rOpts->{'blank-lines-before-subs'}
if ( terminal_type_i( 0, $max_index_to_go ) ne '}' );
}
# Break before certain block types if we haven't had a
# break at this level for a while. This is the
# difficult decision..
elsif ($last_line_leading_type ne 'b'
&& $is_if_unless_while_until_for_foreach{$leading_token} )
{
my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
if ( !defined($lc) ) { $lc = 0 }
# patch for RT #128216: no blank line inserted at a level
# change
if ( $levels_to_go[0] != $last_line_leading_level ) {
$lc = 0;
}
if ( $rOpts->{'blanks-before-blocks'}
&& $lc >= $rOpts->{'long-block-line-count'}
&& $self->consecutive_nonblank_lines() >=
$rOpts->{'long-block-line-count'}
&& terminal_type_i( 0, $max_index_to_go ) ne '}' )
{
$blank_count = 1;
}
}
else {
## no blank
}
}
# blank lines before subs except declarations and one-liners
# Fix for c250: added new type 'P', changed 'i' to 'S'
elsif ( $leading_type eq 'S' || $leading_type eq 'P' ) {
my $special_identifier =
$self->[_ris_special_identifier_token_]->{$leading_token};
if ($special_identifier) {
## $leading_token =~ /$SUB_PATTERN/
if ( $special_identifier eq 'sub' ) {
$blank_count = $rOpts->{'blank-lines-before-subs'}
if ( terminal_type_i( 0, $max_index_to_go ) !~
/^[\;\}\,]$/ );
}
# break before all package declarations
## substr( $leading_token, 0, 8 ) eq 'package '
elsif ( $special_identifier eq 'package' ) {
# ... except in a very short eval block
my $pseqno = $parent_seqno_to_go[0];
$blank_count = $rOpts->{'blank-lines-before-packages'}
if (
!$self->[_ris_short_broken_eval_block_]->{$pseqno}
);
}
else {
DEVEL_MODE && Fault(<<EOM);
Found special identifier '$special_identifier', but expecting 'sub' or 'package'
EOM
}
}
}
# Check for blank lines wanted before a closing brace
elsif ( $leading_token eq '}' ) {
if ( $rOpts->{'blank-lines-before-closing-block'}
&& $block_type_to_go[0]
&& $block_type_to_go[0] =~
/$blank_lines_before_closing_block_pattern/ )
{
my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
if ( $nblanks > $blank_count ) {
$blank_count = $nblanks;
}
}
}
else {
## ok
}
if ($blank_count) {
# future: send blank line down normal path to VerticalAligner?
$self->flush_vertical_aligner();
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->require_blank_code_lines($blank_count);
}
}
# update blank line variables and count number of consecutive
# non-blank, non-comment lines at this level
if ( $leading_level == $last_line_leading_level
&& $leading_type ne '#'
&& defined( $nonblank_lines_at_depth[$leading_level] ) )
{
$nonblank_lines_at_depth[$leading_level]++;
}
else {
$nonblank_lines_at_depth[$leading_level] = 1;
}
$self->[_last_line_leading_type_] = $leading_type;
$self->[_last_line_leading_level_] = $leading_level;
#--------------------------
# scan lists and long lines
#--------------------------
# Flag to remember if we called sub 'pad_array_to_go'.
# Some routines (break_lists(), break_long_lines() ) need some
# extra tokens added at the end of the batch. Most batches do not
# use these routines, so we will avoid calling 'pad_array_to_go'
# unless it is needed.
my $called_pad_array_to_go;
# set all forced breakpoints for good list formatting
my $is_long_line;
my $multiple_old_lines_in_batch;
if ( $max_index_to_go > 0 ) {
$is_long_line =
$self->excess_line_length( $imin, $max_index_to_go ) > 0;
my $Kbeg = $K_to_go[0];
my $Kend = $K_to_go[$max_index_to_go];
$multiple_old_lines_in_batch =
$rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_];
}
# Optional optimization: avoid calling break_lists for a single block
# brace. This is done by turning off the flag $is_unbalanced_batch.
elsif ($is_unbalanced_batch) {
my $block_type = $block_type_to_go[0];
if ( $block_type
&& !$lp_object_count_this_batch
&& $is_block_without_semicolon{$block_type} )
{
# opening blocks can skip break_lists call if no commas in
# container.
if ( $leading_type eq '{' ) {
my $seqno = $type_sequence_to_go[0];
my $rtype_count = $self->[_rtype_count_by_seqno_]->{$seqno};
if ($rtype_count) {
my $comma_count = $rtype_count->{','};
if ( !$comma_count ) {
$is_unbalanced_batch = 0;
}
}
}
# closing block braces can be skipped
else {
$is_unbalanced_batch = 0;
}
}
}
else {
## ok - single token
}
my $rbond_strength_bias = [];
if (
$is_long_line
|| $multiple_old_lines_in_batch
# must always call break_lists() with unbalanced batches because
# it is maintaining some stacks
|| $is_unbalanced_batch
# call break_lists if we might want to break at commas
|| (
$comma_count_in_batch
&& ( $rOpts_maximum_fields_per_table > 0
&& $rOpts_maximum_fields_per_table <= $comma_count_in_batch
|| $rOpts_comma_arrow_breakpoints == 0 )
)
# call break_lists if user may want to break open some one-line
# hash references
|| ( $comma_arrow_count_contained
&& $rOpts_comma_arrow_breakpoints != 3 )
)
{
# add a couple of extra terminal blank tokens
$self->pad_array_to_go();
$called_pad_array_to_go = 1;
my $sgb = $self->break_lists( $is_long_line, $rbond_strength_bias );
$saw_good_break ||= $sgb;
}
# let $ri_first and $ri_last be references to lists of
# first and last tokens of line fragments to output..
my ( $ri_first, $ri_last );
#-----------------------------
# a single token uses one line
#-----------------------------
if ( !$max_index_to_go ) {
$ri_first = [$imin];
$ri_last = [$imax];
}
# for multiple tokens
else {
#-------------------------
# write a single line if..
#-------------------------
if (
(
# this line is 'short'
!$is_long_line
# and we didn't see a good breakpoint
&& !$saw_good_break
# and we don't already have an interior breakpoint
&& !$forced_breakpoint_count
)
# or, we aren't allowed to add any newlines
|| !$rOpts_add_newlines
)
{
$ri_first = [$imin];
$ri_last = [$imax];
}
#-----------------------------
# otherwise use multiple lines
#-----------------------------
else {
# add a couple of extra terminal blank tokens if we haven't
# already done so
$self->pad_array_to_go() unless ($called_pad_array_to_go);
( $ri_first, $ri_last, my $rbond_strength_to_go ) =
$self->break_long_lines( $saw_good_break, \@colon_list,
$rbond_strength_bias );
$self->break_all_chain_tokens( $ri_first, $ri_last );
$self->break_equals( $ri_first, $ri_last )
if @{$ri_first} >= 3;
# now we do a correction step to clean this up a bit
# (The only time we would not do this is for debugging)
$self->recombine_breakpoints( $ri_first, $ri_last,
$rbond_strength_to_go )
if ( $rOpts_recombine && @{$ri_first} > 1 );
$self->insert_final_ternary_breaks( $ri_first, $ri_last )
if (@colon_list);
}
$self->insert_breaks_before_list_opening_containers( $ri_first,
$ri_last )
if ( %break_before_container_types && $max_index_to_go > 0 );
# Check for a phantom semicolon at the end of the batch
if ( !$token_lengths_to_go[$imax] && $types_to_go[$imax] eq ';' ) {
$self->unmask_phantom_token($imax);
}
if ( $rOpts_one_line_block_semicolons == 0 ) {
$self->delete_one_line_semicolons( $ri_first, $ri_last );
}
# Remember the largest batch size processed. This is needed by the
# logical padding routine to avoid padding the first nonblank token
if ( $max_index_to_go > $peak_batch_size ) {
$peak_batch_size = $max_index_to_go;
}
}
#-------------------
# -lp corrector step
#-------------------
if ($lp_object_count_this_batch) {
$self->correct_lp_indentation( $ri_first, $ri_last );
}
#--------------------
# ship this batch out
#--------------------
$this_batch->[_ri_first_] = $ri_first;
$this_batch->[_ri_last_] = $ri_last;
$self->convey_batch_to_vertical_aligner();
#-------------------------------------------------------------------
# Write requested number of blank lines after an opening block brace
#-------------------------------------------------------------------
if ($rOpts_blank_lines_after_opening_block) {
my $iterm = $imax;
if ( $types_to_go[$iterm] eq '#' && $iterm > $imin ) {
$iterm -= 1;
if ( $types_to_go[$iterm] eq 'b' && $iterm > $imin ) {
$iterm -= 1;
}
}
if ( $types_to_go[$iterm] eq '{'
&& $block_type_to_go[$iterm]
&& $block_type_to_go[$iterm] =~
/$blank_lines_after_opening_block_pattern/ )
{
my $nblanks = $rOpts_blank_lines_after_opening_block;
$self->flush_vertical_aligner();
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->require_blank_code_lines($nblanks);
}
}
return;
} ## end sub grind_batch_of_CODE
sub iprev_to_go {
my ($i) = @_;
# Given index $i of a token in the '_to_go' arrays, return
# the index of the previous nonblank token.
return $i - 1 > 0
&& $types_to_go[ $i - 1 ] eq 'b' ? $i - 2 : $i - 1;
}
sub unmask_phantom_token {
my ( $self, $iend ) = @_;
# Turn a phantom token into a real token.
# Input parameter:
# $iend = the index in the output batch array of this token.
# Phantom tokens are specially marked token types (such as ';') with
# no token text which only become real tokens if they occur at the end
# of an output line. At one time phantom ',' tokens were handled
# here, but now they are processed elsewhere.
my $rLL = $self->[_rLL_];
my $KK = $K_to_go[$iend];
my $line_number = 1 + $rLL->[$KK]->[_LINE_INDEX_];
my $type = $types_to_go[$iend];
return unless ( $type eq ';' );
my $tok = $type;
my $tok_len = length($tok);
if ( $want_left_space{$type} != WS_NO ) {
$tok = SPACE . $tok;
$tok_len += 1;
}
$tokens_to_go[$iend] = $tok;
$token_lengths_to_go[$iend] = $tok_len;
$rLL->[$KK]->[_TOKEN_] = $tok;
$rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
$self->note_added_semicolon($line_number);
# This changes the summed lengths of the rest of this batch
foreach ( $iend .. $max_index_to_go ) {
$summed_lengths_to_go[ $_ + 1 ] += $tok_len;
}
return;
} ## end sub unmask_phantom_token
sub save_opening_indentation {
# This should be called after each batch of tokens is output. It
# saves indentations of lines of all unmatched opening tokens.
# These will be used by sub get_opening_indentation.
my ( $self, $ri_first, $ri_last, $rindentation_list,
$runmatched_opening_indexes )
= @_;
$runmatched_opening_indexes = []
if ( !defined($runmatched_opening_indexes) );
# QW INDENTATION PATCH 1:
# Also save indentation for multiline qw quotes
my @i_qw;
my $seqno_qw_opening;
if ( $types_to_go[$max_index_to_go] eq 'q' ) {
my $KK = $K_to_go[$max_index_to_go];
$seqno_qw_opening =
$self->[_rstarting_multiline_qw_seqno_by_K_]->{$KK};
if ($seqno_qw_opening) {
push @i_qw, $max_index_to_go;
}
}
# we need to save indentations of any unmatched opening tokens
# in this batch because we may need them in a subsequent batch.
foreach ( @{$runmatched_opening_indexes}, @i_qw ) {
my $seqno = $type_sequence_to_go[$_];
if ( !$seqno ) {
if ( $seqno_qw_opening && $_ == $max_index_to_go ) {
$seqno = $seqno_qw_opening;
}
else {
# shouldn't happen
$seqno = 'UNKNOWN';
DEVEL_MODE && Fault("unable to find sequence number\n");
}
}
$saved_opening_indentation{$seqno} = [
lookup_opening_indentation(
$_, $ri_first, $ri_last, $rindentation_list
)
];
}
return;
} ## end sub save_opening_indentation
sub get_saved_opening_indentation {
my ($seqno) = @_;
my ( $indent, $offset, $is_leading, $exists ) = ( 0, 0, 0, 0 );
if ($seqno) {
if ( $saved_opening_indentation{$seqno} ) {
( $indent, $offset, $is_leading ) =
@{ $saved_opening_indentation{$seqno} };
$exists = 1;
}
}
# some kind of serious error it doesn't exist
# (example is badfile.t)
return ( $indent, $offset, $is_leading, $exists );
} ## end sub get_saved_opening_indentation
} ## end closure grind_batch_of_CODE
sub lookup_opening_indentation {
# get the indentation of the line in the current output batch
# which output a selected opening token
#
# given:
# $i_opening - index of an opening token in the current output batch
# whose line indentation we need
# $ri_first - reference to list of the first index $i for each output
# line in this batch
# $ri_last - reference to list of the last index $i for each output line
# in this batch
# $rindentation_list - reference to a list containing the indentation
# used for each line. (NOTE: the first slot in
# this list is the last returned line number, and this is
# followed by the list of indentations).
#
# return
# -the indentation of the line which contained token $i_opening
# -and its offset (number of columns) from the start of the line
my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
if ( !@{$ri_last} ) {
# An error here implies a bug introduced by a recent program change.
# Every batch of code has lines, so this should never happen.
if (DEVEL_MODE) {
Fault("Error in opening_indentation: no lines");
}
return ( 0, 0, 0 );
}
my $nline = $rindentation_list->[0]; # line number of previous lookup
# reset line location if necessary
$nline = 0 if ( $i_opening < $ri_start->[$nline] );
# find the correct line
if ( $i_opening <= $ri_last->[-1] ) {
while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
}
# Error - token index is out of bounds - shouldn't happen
# A program bug has been introduced in one of the calling routines.
# We better stop here.
else {
my $i_last_line = $ri_last->[-1];
if (DEVEL_MODE) {
Fault(<<EOM);
Program bug in call to lookup_opening_indentation - index out of range
called with index i_opening=$i_opening > $i_last_line = max index of last line
This batch has max index = $max_index_to_go,
EOM
}
$nline = $#{$ri_last};
}
$rindentation_list->[0] =
$nline; # save line number to start looking next call
my $ibeg = $ri_start->[$nline];
my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
my $is_leading = ( $ibeg == $i_opening );
return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
} ## end sub lookup_opening_indentation
sub terminal_type_i {
# returns type of last token on this line (terminal token), as follows:
# returns # for a full-line comment
# returns ' ' for a blank line
# otherwise returns final token type
my ( $ibeg, $iend ) = @_;
# Start at the end and work backwards
my $i = $iend;
my $type_i = $types_to_go[$i];
# Check for side comment
if ( $type_i eq '#' ) {
$i--;
if ( $i < $ibeg ) {
return wantarray ? ( $type_i, $ibeg ) : $type_i;
}
$type_i = $types_to_go[$i];
}
# Skip past a blank
if ( $type_i eq 'b' ) {
$i--;
if ( $i < $ibeg ) {
return wantarray ? ( $type_i, $ibeg ) : $type_i;
}
$type_i = $types_to_go[$i];
}
# Found it..make sure it is a BLOCK termination,
# but hide a terminal } after sort/map/grep/eval/do because it is not
# necessarily the end of the line. (terminal.t)
my $block_type = $block_type_to_go[$i];
if (
$type_i eq '}'
&& ( !$block_type
|| $is_sort_map_grep_eval_do{$block_type} )
)
{
$type_i = 'b';
}
return wantarray ? ( $type_i, $i ) : $type_i;
} ## end sub terminal_type_i
sub pad_array_to_go {
# To simplify coding in break_lists and set_bond_strengths, it helps to
# create some extra blank tokens at the end of the arrays. We also add
# some undef's to help guard against using invalid data.
my ($self) = @_;
$K_to_go[ $max_index_to_go + 1 ] = undef;
$tokens_to_go[ $max_index_to_go + 1 ] = EMPTY_STRING;
$tokens_to_go[ $max_index_to_go + 2 ] = EMPTY_STRING;
$tokens_to_go[ $max_index_to_go + 3 ] = undef;
$types_to_go[ $max_index_to_go + 1 ] = 'b';
$types_to_go[ $max_index_to_go + 2 ] = 'b';
$types_to_go[ $max_index_to_go + 3 ] = undef;
$nesting_depth_to_go[ $max_index_to_go + 2 ] = undef;
$nesting_depth_to_go[ $max_index_to_go + 1 ] =
$nesting_depth_to_go[$max_index_to_go];
# /^[R\}\)\]]$/
if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
# Nesting depths are set to be >=0 in sub write_line, so it should
# not be possible to get here unless the code has a bracing error
# which leaves a closing brace with zero nesting depth.
if ( !get_saw_brace_error() ) {
if (DEVEL_MODE) {
Fault(<<EOM);
Program bug in pad_array_to_go: hit nesting error which should have been caught
EOM
}
}
}
else {
$nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
}
}
# /^[L\{\(\[]$/
elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
$nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
}
else {
## must be ? or :
}
return;
} ## end sub pad_array_to_go
sub break_all_chain_tokens {
# scan the current breakpoints looking for breaks at certain "chain
# operators" (. : && || + etc) which often occur repeatedly in a long
# statement. If we see a break at any one, break at all similar tokens
# within the same container.
#
my ( $self, $ri_left, $ri_right ) = @_;
my %saw_chain_type;
my %left_chain_type;
my %right_chain_type;
my %interior_chain_type;
my $nmax = @{$ri_right} - 1;
# scan the left and right end tokens of all lines
my $count = 0;
for my $n ( 0 .. $nmax ) {
my $il = $ri_left->[$n];
my $ir = $ri_right->[$n];
my $typel = $types_to_go[$il];
my $typer = $types_to_go[$ir];
$typel = '+' if ( $typel eq '-' ); # treat + and - the same
$typer = '+' if ( $typer eq '-' );
$typel = '*' if ( $typel eq '/' ); # treat * and / the same
$typer = '*' if ( $typer eq '/' );
my $keyl = $typel eq 'k' ? $tokens_to_go[$il] : $typel;
my $keyr = $typer eq 'k' ? $tokens_to_go[$ir] : $typer;
if ( $is_chain_operator{$keyl} && $want_break_before{$typel} ) {
next if ( $typel eq '?' );
push @{ $left_chain_type{$keyl} }, $il;
$saw_chain_type{$keyl} = 1;
$count++;
}
if ( $is_chain_operator{$keyr} && !$want_break_before{$typer} ) {
next if ( $typer eq '?' );
push @{ $right_chain_type{$keyr} }, $ir;
$saw_chain_type{$keyr} = 1;
$count++;
}
}
return unless $count;
# now look for any interior tokens of the same types
$count = 0;
my $has_interior_dot_or_plus;
for my $n ( 0 .. $nmax ) {
my $il = $ri_left->[$n];
my $ir = $ri_right->[$n];
foreach my $i ( $il + 1 .. $ir - 1 ) {
my $type = $types_to_go[$i];
my $key = $type eq 'k' ? $tokens_to_go[$i] : $type;
$key = '+' if ( $key eq '-' );
$key = '*' if ( $key eq '/' );
if ( $saw_chain_type{$key} ) {
push @{ $interior_chain_type{$key} }, $i;
$count++;
$has_interior_dot_or_plus ||= ( $key eq '.' || $key eq '+' );
}
}
}
return unless $count;
my @keys = keys %saw_chain_type;
# quit if just ONE continuation line with leading . For example--
# print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
# . $contents;
# Fixed for b1399.
if ( $has_interior_dot_or_plus && $nmax == 1 && @keys == 1 ) {
return;
}
# now make a list of all new break points
my @insert_list;
# loop over all chain types
foreach my $key (@keys) {
# loop over all interior chain tokens
foreach my $itest ( @{ $interior_chain_type{$key} } ) {
# loop over all left end tokens of same type
if ( $left_chain_type{$key} ) {
next if $nobreak_to_go[ $itest - 1 ];
foreach my $i ( @{ $left_chain_type{$key} } ) {
next unless $self->in_same_container_i( $i, $itest );
push @insert_list, $itest - 1;
# Break at matching ? if this : is at a different level.
# For example, the ? before $THRf_DEAD in the following
# should get a break if its : gets a break.
#
# my $flags =
# ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
# : ( $_ & 4 ) ? $THRf_R_DETACHED
# : $THRf_R_JOINABLE;
if ( $key eq ':'
&& $levels_to_go[$i] != $levels_to_go[$itest] )
{
my $i_question = $mate_index_to_go[$itest];
if ( defined($i_question) && $i_question > 0 ) {
push @insert_list, $i_question - 1;
}
}
last;
}
}
# loop over all right end tokens of same type
if ( $right_chain_type{$key} ) {
next if $nobreak_to_go[$itest];
foreach my $i ( @{ $right_chain_type{$key} } ) {
next unless $self->in_same_container_i( $i, $itest );
push @insert_list, $itest;
# break at matching ? if this : is at a different level
if ( $key eq ':'
&& $levels_to_go[$i] != $levels_to_go[$itest] )
{
my $i_question = $mate_index_to_go[$itest];
if ( defined($i_question) ) {
push @insert_list, $i_question;
}
}
last;
}
}
}
}
# insert any new break points
if (@insert_list) {
$self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
}
return;
} ## end sub break_all_chain_tokens
sub insert_additional_breaks {
# this routine will add line breaks at requested locations after
# sub break_long_lines has made preliminary breaks.
my ( $self, $ri_break_list, $ri_first, $ri_last ) = @_;
my $i_f;
my $i_l;
my $line_number = 0;
foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
next if ( $nobreak_to_go[$i_break_left] );
$i_f = $ri_first->[$line_number];
$i_l = $ri_last->[$line_number];
while ( $i_break_left >= $i_l ) {
$line_number++;
# shouldn't happen unless caller passes bad indexes
if ( $line_number >= @{$ri_last} ) {
if (DEVEL_MODE) {
Fault(<<EOM);
Non-fatal program bug: couldn't set break at $i_break_left
EOM
}
return;
}
$i_f = $ri_first->[$line_number];
$i_l = $ri_last->[$line_number];
}
# Do not leave a blank at the end of a line; back up if necessary
if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
my $i_break_right = $inext_to_go[$i_break_left];
if ( $i_break_left >= $i_f
&& $i_break_left < $i_l
&& $i_break_right > $i_f
&& $i_break_right <= $i_l )
{
splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
}
}
return;
} ## end sub insert_additional_breaks
{ ## begin closure in_same_container_i
my $ris_break_token;
my $ris_comma_token;
BEGIN {
# all cases break on seeing commas at same level
my @q = qw( => );
push @q, ',';
@{$ris_comma_token}{@q} = (1) x scalar(@q);
# Non-ternary text also breaks on seeing any of qw(? : || or )
# Example: we would not want to break at any of these .'s
# : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
push @q, qw( or || ? : );
@{$ris_break_token}{@q} = (1) x scalar(@q);
} ## end BEGIN
sub in_same_container_i {
# Check to see if tokens at i1 and i2 are in the same container, and
# not separated by certain characters: => , ? : || or
# This is an interface between the _to_go arrays to the rLL array
my ( $self, $i1, $i2 ) = @_;
# quick check
my $parent_seqno_1 = $parent_seqno_to_go[$i1];
return if ( $parent_seqno_to_go[$i2] ne $parent_seqno_1 );
if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
my $K1 = $K_to_go[$i1];
my $K2 = $K_to_go[$i2];
my $rLL = $self->[_rLL_];
my $depth_1 = $nesting_depth_to_go[$i1];
return if ( $depth_1 < 0 );
# Shouldn't happen since i1 and i2 have same parent:
return unless ( $nesting_depth_to_go[$i2] == $depth_1 );
# Select character set to scan for
my $type_1 = $types_to_go[$i1];
my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
# Fast preliminary loop to verify that tokens are in the same container
my $KK = $K1;
while (1) {
$KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
last if !defined($KK);
last if ( $KK >= $K2 );
my $ii = $i1 + $KK - $K1;
my $depth_i = $nesting_depth_to_go[$ii];
return if ( $depth_i < $depth_1 );
next if ( $depth_i > $depth_1 );
if ( $type_1 ne ':' ) {
my $tok_i = $tokens_to_go[$ii];
return if ( $tok_i eq '?' || $tok_i eq ':' );
}
}
# Slow loop checking for certain characters
#-----------------------------------------------------
# This is potentially a slow routine and not critical.
# For safety just give up for large differences.
# See test file 'infinite_loop.txt'
#-----------------------------------------------------
return if ( $i2 - $i1 > 200 );
foreach my $ii ( $i1 + 1 .. $i2 - 1 ) {
my $depth_i = $nesting_depth_to_go[$ii];
next if ( $depth_i > $depth_1 );
return if ( $depth_i < $depth_1 );
my $tok_i = $tokens_to_go[$ii];
return if ( $rbreak->{$tok_i} );
}
return 1;
} ## end sub in_same_container_i
} ## end closure in_same_container_i
sub break_equals {
# Look for assignment operators that could use a breakpoint.
# For example, in the following snippet
#
# $HOME = $ENV{HOME}
# || $ENV{LOGDIR}
# || $pw[7]
# || die "no home directory for user $<";
#
# we could break at the = to get this, which is a little nicer:
# $HOME =
# $ENV{HOME}
# || $ENV{LOGDIR}
# || $pw[7]
# || die "no home directory for user $<";
#
# The logic here follows the logic in set_logical_padding, which
# will add the padding in the second line to improve alignment.
#
my ( $self, $ri_left, $ri_right ) = @_;
my $nmax = @{$ri_right} - 1;
return if ( $nmax < 2 );
# scan the left ends of first two lines
my $tokbeg = EMPTY_STRING;
my $depth_beg;
for my $n ( 1 .. 2 ) {
my $il = $ri_left->[$n];
my $typel = $types_to_go[$il];
my $tokenl = $tokens_to_go[$il];
my $keyl = $typel eq 'k' ? $tokenl : $typel;
my $has_leading_op = $is_chain_operator{$keyl};
return unless ($has_leading_op);
if ( $n > 1 ) {
return
unless ( $tokenl eq $tokbeg
&& $nesting_depth_to_go[$il] eq $depth_beg );
}
$tokbeg = $tokenl;
$depth_beg = $nesting_depth_to_go[$il];
}
# now look for any interior tokens of the same types
my $il = $ri_left->[0];
my $ir = $ri_right->[0];
# now make a list of all new break points
my @insert_list;
foreach my $i ( reverse( $il + 1 .. $ir - 1 ) ) {
my $type = $types_to_go[$i];
if ( $is_assignment{$type}
&& $nesting_depth_to_go[$i] eq $depth_beg )
{
if ( $want_break_before{$type} ) {
push @insert_list, $i - 1;
}
else {
push @insert_list, $i;
}
}
}
# Break after a 'return' followed by a chain of operators
# return ( $^O !~ /win32|dos/i )
# && ( $^O ne 'VMS' )
# && ( $^O ne 'OS2' )
# && ( $^O ne 'MacOS' );
# To give:
# return
# ( $^O !~ /win32|dos/i )
# && ( $^O ne 'VMS' )
# && ( $^O ne 'OS2' )
# && ( $^O ne 'MacOS' );
my $i = 0;
if ( $types_to_go[$i] eq 'k'
&& $tokens_to_go[$i] eq 'return'
&& $ir > $il
&& $nesting_depth_to_go[$i] eq $depth_beg )
{
push @insert_list, $i;
}
return unless (@insert_list);
# One final check...
# scan second and third lines and be sure there are no assignments
# we want to avoid breaking at an = to make something like this:
# unless ( $icon =
# $html_icons{"$type-$state"}
# or $icon = $html_icons{$type}
# or $icon = $html_icons{$state} )
for my $n ( 1 .. 2 ) {
my $il_n = $ri_left->[$n];
my $ir_n = $ri_right->[$n];
foreach my $i ( $il_n + 1 .. $ir_n ) {
my $type = $types_to_go[$i];
return
if ( $is_assignment{$type}
&& $nesting_depth_to_go[$i] eq $depth_beg );
}
}
# ok, insert any new break point
if (@insert_list) {
$self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
}
return;
} ## end sub break_equals
{ ## begin closure recombine_breakpoints
# This routine is called once per batch to see if it would be better
# to combine some of the lines into which the batch has been broken.
my %is_amp_amp;
my %is_math_op;
my %is_plus_minus;
my %is_mult_div;
BEGIN {
my @q;
@q = qw( && || );
@is_amp_amp{@q} = (1) x scalar(@q);
@q = qw( + - * / );
@is_math_op{@q} = (1) x scalar(@q);
@q = qw( + - );
@is_plus_minus{@q} = (1) x scalar(@q);
@q = qw( * / );
@is_mult_div{@q} = (1) x scalar(@q);
} ## end BEGIN
sub Debug_dump_breakpoints {
# Debug routine to dump current breakpoints...not normally called
# We are given indexes to the current lines:
# $ri_beg = ref to array of BEGinning indexes of each line
# $ri_end = ref to array of ENDing indexes of each line
my ( $self, $ri_beg, $ri_end, $msg ) = @_;
print {*STDOUT} "----Dumping breakpoints from: $msg----\n";
for my $n ( 0 .. @{$ri_end} - 1 ) {
my $ibeg = $ri_beg->[$n];
my $iend = $ri_end->[$n];
my $text = EMPTY_STRING;
foreach my $i ( $ibeg .. $iend ) {
$text .= $tokens_to_go[$i];
}
print {*STDOUT} "$n ($ibeg:$iend) $text\n";
}
print {*STDOUT} "----\n";
return;
} ## end sub Debug_dump_breakpoints
sub delete_one_line_semicolons {
my ( $self, $ri_beg, $ri_end ) = @_;
my $rLL = $self->[_rLL_];
my $K_opening_container = $self->[_K_opening_container_];
# Walk down the lines of this batch and delete any semicolons
# terminating one-line blocks;
my $nmax = @{$ri_end} - 1;
foreach my $n ( 0 .. $nmax ) {
my $i_beg = $ri_beg->[$n];
my $i_e = $ri_end->[$n];
my $K_beg = $K_to_go[$i_beg];
my $K_e = $K_to_go[$i_e];
my $K_end = $K_e;
my $type_end = $rLL->[$K_end]->[_TYPE_];
if ( $type_end eq '#' ) {
$K_end = $self->K_previous_nonblank($K_end);
if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; }
}
# we are looking for a line ending in closing brace
next
unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' );
# ...and preceded by a semicolon on the same line
my $K_semicolon = $self->K_previous_nonblank($K_end);
next unless defined($K_semicolon);
my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg );
next if ( $i_semicolon <= $i_beg );
next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
# Safety check - shouldn't happen - not critical
# This is not worth throwing a Fault, except in DEVEL_MODE
if ( $types_to_go[$i_semicolon] ne ';' ) {
DEVEL_MODE
&& Fault("unexpected type looking for semicolon");
next;
}
# ... with the corresponding opening brace on the same line
my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_];
my $K_opening = $K_opening_container->{$type_sequence};
next unless ( defined($K_opening) );
my $i_opening = $i_beg + ( $K_opening - $K_beg );
next if ( $i_opening < $i_beg );
# ... and only one semicolon between these braces
my $semicolon_count = 0;
foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) {
if ( $rLL->[$K]->[_TYPE_] eq ';' ) {
$semicolon_count++;
last;
}
}
next if ($semicolon_count);
# ...ok, then make the semicolon invisible
my $len = $token_lengths_to_go[$i_semicolon];
$tokens_to_go[$i_semicolon] = EMPTY_STRING;
$token_lengths_to_go[$i_semicolon] = 0;
$rLL->[$K_semicolon]->[_TOKEN_] = EMPTY_STRING;
$rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0;
foreach ( $i_semicolon .. $max_index_to_go ) {
$summed_lengths_to_go[ $_ + 1 ] -= $len;
}
}
return;
} ## end sub delete_one_line_semicolons
use constant DEBUG_RECOMBINE => 0;
sub recombine_breakpoints {
my ( $self, $ri_beg, $ri_end, $rbond_strength_to_go ) = @_;
# This sub implements the 'recombine' operation on a batch.
# Its task is to combine some of these lines back together to
# improve formatting. The need for this arises because
# sub 'break_long_lines' is very liberal in setting line breaks
# for long lines, always setting breaks at good breakpoints, even
# when that creates small lines. Sometimes small line fragments
# are produced which would look better if they were combined.
# Input parameters:
# $ri_beg = ref to array of BEGinning indexes of each line
# $ri_end = ref to array of ENDing indexes of each line
# $rbond_strength_to_go = array of bond strengths pulling
# tokens together, used to decide where best to recombine lines.
#-------------------------------------------------------------------
# Do nothing under extreme stress; use <= 2 for c171.
# (NOTE: New optimizations make this unnecessary. But removing this
# check is not really useful because this condition only occurs in
# test runs, and another formatting pass will fix things anyway.)
# This routine has a long history of improvements. Some past
# relevant issues are : c118, c167, c171, c186, c187, c193, c200.
#-------------------------------------------------------------------
return if ( $high_stress_level <= 2 );
my $nmax_start = @{$ri_end} - 1;
return if ( $nmax_start <= 0 );
my $iend_max = $ri_end->[$nmax_start];
if ( $types_to_go[$iend_max] eq '#' ) {
$iend_max = iprev_to_go($iend_max);
}
my $has_terminal_semicolon =
$iend_max >= 0 && $types_to_go[$iend_max] eq ';';
#--------------------------------------------------------------------
# Break into the smallest possible sub-sections to improve efficiency
#--------------------------------------------------------------------
# Also make a list of all good joining tokens between the lines
# n-1 and n.
my @joint;
my $rsections = [];
my $nbeg_sec = 0;
my $nend_sec;
my $nmax_section = 0;
foreach my $nn ( 1 .. $nmax_start ) {
my $ibeg_1 = $ri_beg->[ $nn - 1 ];
my $iend_1 = $ri_end->[ $nn - 1 ];
my $iend_2 = $ri_end->[$nn];
my $ibeg_2 = $ri_beg->[$nn];
# Define certain good joint tokens
my ( $itok, $itokp, $itokm );
foreach my $itest ( $iend_1, $ibeg_2 ) {
my $type = $types_to_go[$itest];
if ( $is_math_op{$type}
|| $is_amp_amp{$type}
|| $is_assignment{$type}
|| $type eq ':' )
{
$itok = $itest;
}
}
# joint[$nn] = index of joint character
$joint[$nn] = $itok;
# Update the section list
my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
if (
$excess <= 1
# The number 5 here is an arbitrary small number intended
# to keep most small matches in one sub-section.
|| ( defined($nend_sec)
&& ( $nn < 5 || $nmax_start - $nn < 5 ) )
)
{
$nend_sec = $nn;
}
else {
if ( defined($nend_sec) ) {
push @{$rsections}, [ $nbeg_sec, $nend_sec ];
my $num = $nend_sec - $nbeg_sec;
if ( $num > $nmax_section ) { $nmax_section = $num }
$nbeg_sec = $nn;
$nend_sec = undef;
}
$nbeg_sec = $nn;
}
}
if ( defined($nend_sec) ) {
push @{$rsections}, [ $nbeg_sec, $nend_sec ];
my $num = $nend_sec - $nbeg_sec;
if ( $num > $nmax_section ) { $nmax_section = $num }
}
my $num_sections = @{$rsections};
if ( DEBUG_RECOMBINE > 1 ) {
print {*STDOUT} <<EOM;
sections=$num_sections; nmax_sec=$nmax_section
EOM
}
if ( DEBUG_RECOMBINE > 0 ) {
my $max = 0;
print {*STDOUT}
"-----\n$num_sections sections found for nmax=$nmax_start\n";
foreach my $sect ( @{$rsections} ) {
my ( $nbeg, $nend ) = @{$sect};
my $num = $nend - $nbeg;
if ( $num > $max ) { $max = $num }
print {*STDOUT} "$nbeg $nend\n";
}
print {*STDOUT} "max size=$max of $nmax_start lines\n";
}
# Loop over all sub-sections. Note that we have to work backwards
# from the end of the batch since the sections use original line
# numbers, and the line numbers change as we go.
while ( my $section = pop @{$rsections} ) {
my ( $nbeg, $nend ) = @{$section};
$self->recombine_section_loop(
{
_ri_beg => $ri_beg,
_ri_end => $ri_end,
_nbeg => $nbeg,
_nend => $nend,
_rjoint => \@joint,
_rbond_strength_to_go => $rbond_strength_to_go,
_has_terminal_semicolon => $has_terminal_semicolon,
}
);
}
return;
} ## end sub recombine_breakpoints
sub recombine_section_loop {
my ( $self, $rhash ) = @_;
# Recombine breakpoints for one section of lines in the current batch
# Given:
# $ri_beg, $ri_end = ref to arrays with token indexes of the first
# and last line
# $nbeg, $nend = line numbers bounding this section
# $rjoint = ref to array of good joining tokens per line
# Update: $ri_beg, $ri_end, $rjoint if lines are joined
# Returns:
# nothing
#-------------
# Definitions:
#-------------
# $rhash = {
# _ri_beg = ref to array with starting token index by line
# _ri_end = ref to array with ending token index by line
# _nbeg = first line number of this section
# _nend = last line number of this section
# _rjoint = ref to array of good joining tokens for each line
# _rbond_strength_to_go = array of bond strengths
# _has_terminal_semicolon = true if last line of batch has ';'
# _num_freeze = fixed number of lines at end of this batch
# _optimization_on = true during final optimization loop
# _num_compares = total number of line compares made so far
# _pair_list = list of line pairs in optimal search order
# };
#-------------
# How it works
#-------------
# We are working with a sequence of output lines and looking at
# each pair. We must decide if it is better to join each of
# these line pairs.
# The brute force method is to loop through all line pairs and
# join the best possible pair, as determined by either some
# logical criterion or by the maximum 'bond strength' assigned
# to the joining token. Then keep doing this until there are
# no remaining line pairs to join.
# This works, but a problem is that it can theoretically take
# on the order of N^2 comparisons in some pathological cases.
# This can require an excessive amount of run time.
# We can avoid excessive run time by conceptually dividing the
# work into two phases. In the first phase we make any joints
# required by user settings or logic other than the strength of
# joints. In the second phase we make any remaining joints
# based on strengths. To do this optimally, we do a preliminary
# sort on joint strengths and always loop in that order. That
# way, we can stop a search on the first joint strength because
# it will be the maximum.
# This method is very fast, requiring no more than 3*N line
# comparisons, where N is the number of lines (see below).
my $ri_beg = $rhash->{_ri_beg};
my $ri_end = $rhash->{_ri_end};
# Line index range of this section:
my $nbeg = $rhash->{_nbeg}; # stays constant
my $nend = $rhash->{_nend}; # will decrease
# $nmax_batch = starting number of lines in the full batch
# $num_freeze = number of lines following this section to leave alone
my $nmax_batch = @{$ri_end} - 1;
$rhash->{_num_freeze} = $nmax_batch - $nend;
# Setup the list of line pairs to test. This stores the following
# values for each line pair:
# [ $n=index of the second line of the pair, $bs=bond strength]
my @pair_list;
my $rbond_strength_to_go = $rhash->{_rbond_strength_to_go};
foreach my $n ( $nbeg + 1 .. $nend ) {
my $iend_1 = $ri_end->[ $n - 1 ];
my $ibeg_2 = $ri_beg->[$n];
my $bs_tweak = 0;
if ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) { $bs_tweak = 0.25 }
my $bs = $rbond_strength_to_go->[$iend_1] + $bs_tweak;
push @pair_list, [ $n, $bs ];
}
# Any order for testing is possible, but optimization is only possible
# if we sort the line pairs on decreasing joint strength.
@pair_list =
sort { $b->[1] <=> $a->[1] || $a->[0] <=> $b->[0] } @pair_list;
$rhash->{_rpair_list} = \@pair_list;
#----------------
# Iteration limit
#----------------
# This is now a very fast loop which runs in O(n) time, but a
# check on total number of iterations is retained to guard
# against future programming errors.
# Most cases require roughly 1 comparison per line pair (1 full pass).
# The upper bound is estimated to be about 3 comparisons per line pair
# unless optimization is deactivated. The approximate breakdown is:
# 1 pass with 1 compare per joint to do any special cases, plus
# 1 pass with up to 2 compares per joint in optimization mode
# The most extreme cases in my collection are:
# camel1.t - needs 2.7 compares per line (12 without optimization)
# ternary.t - needs 2.8 compares per line (12 without optimization)
# c206 - needs 3.3 compares per line, found with random testing
# So a value of MAX_COMPARE_RATIO = 4 looks like an upper bound as
# long as optimization is used. A value of 20 should allow all code to
# pass even if optimization is turned off for testing.
use constant MAX_COMPARE_RATIO => DEVEL_MODE ? 4 : 20;
my $num_pairs = $nend - $nbeg + 1;
my $max_compares = MAX_COMPARE_RATIO * $num_pairs;
# Always start with optimization off
$rhash->{_num_compares} = 0;
$rhash->{_optimization_on} = 0;
$rhash->{_ix_best_last} = 0;
#--------------------------------------------
# loop until there are no more recombinations
#--------------------------------------------
my $nmax_last = $nmax_batch + 1;
while (1) {
# Stop when the number of lines in the batch does not decrease
$nmax_batch = @{$ri_end} - 1;
if ( $nmax_batch >= $nmax_last ) {
last;
}
$nmax_last = $nmax_batch;
#-----------------------------------------
# inner loop to find next best combination
#-----------------------------------------
$self->recombine_inner_loop($rhash);
# Iteration limit check:
if ( $rhash->{_num_compares} > $max_compares ) {
# See note above; should only get here on a programming error
if (DEVEL_MODE) {
my $ibeg = $ri_beg->[$nbeg];
my $Kbeg = $K_to_go[$ibeg];
my $lno = $self->[_rLL_]->[$Kbeg]->[_LINE_INDEX_];
Fault(<<EOM);
inner loop passes =$rhash->{_num_compares} exceeds max=$max_compares, near line $lno
EOM
}
last;
}
} ## end iteration loop
if (DEBUG_RECOMBINE) {
my $ratio = sprintf "%0.3f", $rhash->{_num_compares} / $num_pairs;
print {*STDOUT}
"exiting recombine_inner_loop with $nmax_last lines, opt=$rhash->{_optimization_on}, starting pairs=$num_pairs, num_compares=$rhash->{_num_compares}, ratio=$ratio\n";
}
return;
} ## end sub recombine_section_loop
sub recombine_inner_loop {
my ( $self, $rhash ) = @_;
# This is the inner loop of the recombine operation. We look at all of
# the remaining joints in this section and select the best joint to be
# recombined. If a recombination is made, the number of lines
# in this section will be reduced by one.
# Returns: nothing
my $rK_weld_right = $self->[_rK_weld_right_];
my $rK_weld_left = $self->[_rK_weld_left_];
my $ri_beg = $rhash->{_ri_beg};
my $ri_end = $rhash->{_ri_end};
my $nbeg = $rhash->{_nbeg};
my $rjoint = $rhash->{_rjoint};
my $rbond_strength_to_go = $rhash->{_rbond_strength_to_go};
my $rpair_list = $rhash->{_rpair_list};
# This will remember the best joint:
my $n_best = 0;
my $bs_best = 0.;
my $ix_best = 0;
my $num_bs = 0;
# The range of lines in this group is $nbeg to $nstop
my $nmax = @{$ri_end} - 1;
my $nstop = $nmax - $rhash->{_num_freeze};
my $num_joints = $nstop - $nbeg;
# Turn off optimization if just two joints remain to allow
# special two-line logic to be checked (c193)
if ( $rhash->{_optimization_on} && $num_joints <= 2 ) {
$rhash->{_optimization_on} = 0;
}
# Start where we ended the last search
my $ix_start = $rhash->{_ix_best_last};
# Keep the starting index in bounds
$ix_start = max( 0, $ix_start );
# Make a search order list which cycles around to visit
# all line pairs.
my $ix_max = @{$rpair_list} - 1;
my @ix_list = ( $ix_start .. $ix_max, 0 .. $ix_start - 1 );
my $ix_last = $ix_list[-1];
#-------------------------
# loop over all line pairs
#-------------------------
my $incomplete_loop;
foreach my $ix (@ix_list) {
my $item = $rpair_list->[$ix];
my ( $n, $bs ) = @{$item};
# This flag will be true if we 'last' out of this loop early.
# We cannot turn on optimization if this is true.
$incomplete_loop = $ix != $ix_last;
# Update the count of the number of times through this inner loop
$rhash->{_num_compares}++;
#----------------------------------------------------------
# If we join the current pair of lines,
# line $n-1 will become the left part of the joined line
# line $n will become the right part of the joined line
#
# Here are Indexes of the endpoint tokens of the two lines:
#
# -----line $n-1--- | -----line $n-----
# $ibeg_1 $iend_1 | $ibeg_2 $iend_2
# ^
# |
# We want to decide if we should remove the line break
# between the tokens at $iend_1 and $ibeg_2
#
# We will apply a number of ad-hoc tests to see if joining
# here will look ok. The code will just move to the next
# pair if the join doesn't look good. If we get through
# the gauntlet of tests, the lines will be recombined.
#----------------------------------------------------------
#
# beginning and ending tokens of the lines we are working on
my $ibeg_1 = $ri_beg->[ $n - 1 ];
my $iend_1 = $ri_end->[ $n - 1 ];
my $iend_2 = $ri_end->[$n];
my $ibeg_2 = $ri_beg->[$n];
# The combined line cannot be too long
my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
next if ( $excess > 0 );
my $type_iend_1 = $types_to_go[$iend_1];
my $type_iend_2 = $types_to_go[$iend_2];
my $type_ibeg_1 = $types_to_go[$ibeg_1];
my $type_ibeg_2 = $types_to_go[$ibeg_2];
DEBUG_RECOMBINE > 1 && do {
print {*STDOUT}
"RECOMBINE: ix=$ix iend1=$iend_1 iend2=$iend_2 n=$n nmax=$nmax if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
};
# If line $n is the last line, we set some flags and
# do any special checks for it
my $this_line_is_semicolon_terminated;
if ( $n == $nmax ) {
if ( $type_ibeg_2 eq '{' ) {
# join isolated ')' and '{' if requested (git #110)
if ( $rOpts_cuddled_paren_brace
&& $type_iend_1 eq '}'
&& $iend_1 == $ibeg_1
&& $ibeg_2 == $iend_2 )
{
if ( $tokens_to_go[$iend_1] eq ')'
&& $tokens_to_go[$ibeg_2] eq '{' )
{
$n_best = $n;
$ix_best = $ix;
last;
}
}
# otherwise, a terminal '{' should stay where it is
# unless preceded by a fat comma
next if ( $type_iend_1 ne '=>' );
}
$this_line_is_semicolon_terminated =
$rhash->{_has_terminal_semicolon};
}
#----------------------------------------------------------
# Recombine Section 0:
# Examine the special token joining this line pair, if any.
# Put as many tests in this section to avoid duplicate code
# and to make formatting independent of whether breaks are
# to the left or right of an operator.
#----------------------------------------------------------
my $itok = $rjoint->[$n];
if ($itok) {
my $ok_0 = recombine_section_0( $itok, $ri_beg, $ri_end, $n );
next if ( !$ok_0 );
}
#----------------------------------------------------------
# Recombine Section 1:
# Join welded nested containers immediately
#----------------------------------------------------------
if (
$total_weld_count
&& ( $type_sequence_to_go[$iend_1]
&& defined( $rK_weld_right->{ $K_to_go[$iend_1] } )
|| $type_sequence_to_go[$ibeg_2]
&& defined( $rK_weld_left->{ $K_to_go[$ibeg_2] } ) )
)
{
$n_best = $n;
$ix_best = $ix;
last;
}
#----------------------------------------------------------
# Recombine Section 2:
# Examine token at $iend_1 (right end of first line of pair)
#----------------------------------------------------------
my ( $ok_2, $skip_Section_3 ) =
recombine_section_2( $ri_beg, $ri_end, $n,
$this_line_is_semicolon_terminated );
next if ( !$ok_2 );
#----------------------------------------------------------
# Recombine Section 3:
# Examine token at $ibeg_2 (left end of second line of pair)
#----------------------------------------------------------
# Join lines identified above as capable of
# causing an outdented line with leading closing paren.
# Note that we are skipping the rest of this section
# and the rest of the loop to do the join.
if ($skip_Section_3) {
$forced_breakpoint_to_go[$iend_1] = 0;
$n_best = $n;
$ix_best = $ix;
$incomplete_loop = 1;
last;
}
my ( $ok_3, $bs_tweak ) =
recombine_section_3( $ri_beg, $ri_end, $n,
$this_line_is_semicolon_terminated );
next if ( !$ok_3 );
#----------------------------------------------------------
# Recombine Section 4:
# Combine the lines if we arrive here and it is possible
#----------------------------------------------------------
# honor hard breakpoints
next if ( $forced_breakpoint_to_go[$iend_1] );
if (DEVEL_MODE) {
# This fault can only occur if an array index error has been
# introduced by a recent programming change.
my $bs_check = $rbond_strength_to_go->[$iend_1] + $bs_tweak;
if ( $bs_check != $bs ) {
Fault(<<EOM);
bs=$bs != $bs_check for break after type $type_iend_1 ix=$ix n=$n
EOM
}
}
# Require a few extra spaces before recombining lines if we
# are at an old breakpoint unless this is a simple list or
# terminal line. The goal is to avoid oscillating between
# two quasi-stable end states. For example this snippet
# caused problems:
## my $this =
## bless {
## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
## },
## $type;
next
if ( $old_breakpoint_to_go[$iend_1]
&& !$this_line_is_semicolon_terminated
&& $n < $nmax
&& $excess + 4 > 0
&& $type_iend_2 ne ',' );
# do not recombine if we would skip in indentation levels
if ( $n < $nmax ) {
my $if_next = $ri_beg->[ $n + 1 ];
next
if (
$levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
&& $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
# but an isolated 'if (' is undesirable
&& !(
$n == 1
&& $iend_1 - $ibeg_1 <= 2
&& $type_ibeg_1 eq 'k'
&& $tokens_to_go[$ibeg_1] eq 'if'
&& $tokens_to_go[$iend_1] ne '('
)
);
}
## OLD: honor no-break's
## next if ( $bs >= NO_BREAK - 1 ); # removed for b1257
# remember the pair with the greatest bond strength
if ( !$n_best ) {
# First good joint ...
$n_best = $n;
$ix_best = $ix;
$bs_best = $bs;
$num_bs = 1;
# In optimization mode: stop on the first acceptable joint
# because we already know it has the highest strength
if ( $rhash->{_optimization_on} == 1 ) {
last;
}
}
else {
# Second and later joints ..
$num_bs++;
# save maximum strength; in case of a tie select min $n
if ( $bs > $bs_best || $bs == $bs_best && $n < $n_best ) {
$n_best = $n;
$ix_best = $ix;
$bs_best = $bs;
}
}
} ## end loop over all line pairs
#---------------------------------------------------
# recombine the pair with the greatest bond strength
#---------------------------------------------------
if ($n_best) {
DEBUG_RECOMBINE > 1
&& print "BEST: nb=$n_best nbeg=$nbeg stop=$nstop bs=$bs_best\n";
splice @{$ri_beg}, $n_best, 1;
splice @{$ri_end}, $n_best - 1, 1;
splice @{$rjoint}, $n_best, 1;
splice @{$rpair_list}, $ix_best, 1;
# Update the line indexes in the pair list:
# Old $n values greater than the best $n decrease by 1
# because of the splice we just did.
foreach my $item ( @{$rpair_list} ) {
my $n_old = $item->[0];
if ( $n_old > $n_best ) { $item->[0] -= 1 }
}
# Store the index of this location for starting the next search.
# We must subtract 1 to get an updated index because the splice
# above just removed the best pair.
# BUT CAUTION: if this is the first pair in the pair list, then
# this produces an invalid index. So this index must be tested
# before use in the next pass through the outer loop.
$rhash->{_ix_best_last} = $ix_best - 1;
# Turn on optimization if ...
if (
# it is not already on, and
!$rhash->{_optimization_on}
# we have not taken a shortcut to get here, and
&& !$incomplete_loop
# we have seen a good break on strength, and
&& $num_bs
)
{
# To deactivate optimization for testing purposes, the next
# line can be commented out. This will increase run time.
$rhash->{_optimization_on} = 1;
if (DEBUG_RECOMBINE) {
my $num_compares = $rhash->{_num_compares};
my $pair_count = @ix_list;
print {*STDOUT}
"Entering optimization phase at $num_compares compares, pair count = $pair_count\n";
}
}
}
return;
} ## end sub recombine_inner_loop
sub recombine_section_0 {
my ( $itok, $ri_beg, $ri_end, $n ) = @_;
# Recombine Section 0:
# Examine special candidate joining token $itok
# Given:
# $itok = index of token at a possible join of lines $n-1 and $n
# Return:
# true => ok to combine
# false => do not combine lines
# Here are Indexes of the endpoint tokens of the two lines:
#
# -----line $n-1--- | -----line $n-----
# $ibeg_1 $iend_1 | $ibeg_2 $iend_2
# ^ ^
# | |
# ------------$itok is one of these tokens
# Put as many tests in this section to avoid duplicate code
# and to make formatting independent of whether breaks are
# to the left or right of an operator.
my $nmax = @{$ri_end} - 1;
my $ibeg_1 = $ri_beg->[ $n - 1 ];
my $iend_1 = $ri_end->[ $n - 1 ];
my $ibeg_2 = $ri_beg->[$n];
my $iend_2 = $ri_end->[$n];
if ($itok) {
my $type = $types_to_go[$itok];
if ( $type eq ':' ) {
# do not join at a colon unless it disobeys the
# break request
if ( $itok eq $iend_1 ) {
return unless $want_break_before{$type};
}
else {
return if $want_break_before{$type};
}
} ## end if ':'
# handle math operators + - * /
elsif ( $is_math_op{$type} ) {
# Combine these lines if this line is a single
# number, or if it is a short term with same
# operator as the previous line. For example, in
# the following code we will combine all of the
# short terms $A, $B, $C, $D, $E, $F, together
# instead of leaving them one per line:
# my $time =
# $A * $B * $C * $D * $E * $F *
# ( 2. * $eps * $sigma * $area ) *
# ( 1. / $tcold**3 - 1. / $thot**3 );
# This can be important in math-intensive code.
my $good_combo;
my $itokp = min( $inext_to_go[$itok], $iend_2 );
my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
my $itokm = max( iprev_to_go($itok), $ibeg_1 );
my $itokmm = max( iprev_to_go($itokm), $ibeg_1 );
# check for a number on the right
if ( $types_to_go[$itokp] eq 'n' ) {
# ok if nothing else on right
if ( $itokp == $iend_2 ) {
$good_combo = 1;
}
else {
# look one more token to right..
# okay if math operator or some termination
$good_combo =
( ( $itokpp == $iend_2 )
&& $is_math_op{ $types_to_go[$itokpp] } )
|| $types_to_go[$itokpp] =~ /^[#,;]$/;
}
}
# check for a number on the left
if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
# okay if nothing else to left
if ( $itokm == $ibeg_1 ) {
$good_combo = 1;
}
# otherwise look one more token to left
else {
# okay if math operator, comma, or assignment
$good_combo = ( $itokmm == $ibeg_1 )
&& ( $is_math_op{ $types_to_go[$itokmm] }
|| $types_to_go[$itokmm] =~ /^[,]$/
|| $is_assignment{ $types_to_go[$itokmm] } );
}
}
# look for a single short token either side of the
# operator
if ( !$good_combo ) {
# Slight adjustment factor to make results
# independent of break before or after operator
# in long summed lists. (An operator and a
# space make two spaces).
my $two = ( $itok eq $iend_1 ) ? 2 : 0;
$good_combo =
# numbers or id's on both sides of this joint
$types_to_go[$itokp] =~ /^[in]$/
&& $types_to_go[$itokm] =~ /^[in]$/
# one of the two lines must be short:
&& (
(
# no more than 2 nonblank tokens right
# of joint
$itokpp == $iend_2
# short
&& token_sequence_length( $itokp, $iend_2 ) <
$two + $rOpts_short_concatenation_item_length
)
|| (
# no more than 2 nonblank tokens left of
# joint
$itokmm == $ibeg_1
# short
&& token_sequence_length( $ibeg_1, $itokm ) <
2 - $two + $rOpts_short_concatenation_item_length
)
)
# keep pure terms; don't mix +- with */
&& !(
$is_plus_minus{$type}
&& ( $is_mult_div{ $types_to_go[$itokmm] }
|| $is_mult_div{ $types_to_go[$itokpp] } )
)
&& !(
$is_mult_div{$type}
&& ( $is_plus_minus{ $types_to_go[$itokmm] }
|| $is_plus_minus{ $types_to_go[$itokpp] } )
)
;
}
# it is also good to combine if we can reduce to 2
# lines
if ( !$good_combo ) {
# index on other line where same token would be
# in a long chain.
my $iother = ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
$good_combo =
$n == 2
&& $n == $nmax
&& $types_to_go[$iother] ne $type;
}
return unless ($good_combo);
} ## end math
elsif ( $is_amp_amp{$type} ) {
##TBD
} ## end &&, ||
elsif ( $is_assignment{$type} ) {
##TBD
}
else {
## ok - not a special type
}
## end assignment
}
# ok to combine lines
return 1;
} ## end sub recombine_section_0
sub recombine_section_2 {
my ( $ri_beg, $ri_end, $n, $this_line_is_semicolon_terminated ) = @_;
# Recombine Section 2:
# Examine token at $iend_1 (right end of first line of pair)
# Here are Indexes of the endpoint tokens of the two lines:
#
# -----line $n-1--- | -----line $n-----
# $ibeg_1 $iend_1 | $ibeg_2 $iend_2
# ^
# |
# -----Section 2 looks at this token
# Returns:
# (nothing) => do not join lines
# 1, skip_Section_3 => ok to join lines
# $skip_Section_3 is a flag for skipping the next section
my $skip_Section_3 = 0;
my $nmax = @{$ri_end} - 1;
my $ibeg_1 = $ri_beg->[ $n - 1 ];
my $iend_1 = $ri_end->[ $n - 1 ];
my $iend_2 = $ri_end->[$n];
my $ibeg_2 = $ri_beg->[$n];
my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
my $ibeg_nmax = $ri_beg->[$nmax];
my $type_iend_1 = $types_to_go[$iend_1];
my $type_iend_2 = $types_to_go[$iend_2];
my $type_ibeg_1 = $types_to_go[$ibeg_1];
my $type_ibeg_2 = $types_to_go[$ibeg_2];
# an isolated '}' may join with a ';' terminated segment
if ( $type_iend_1 eq '}' ) {
# Check for cases where combining a semicolon terminated
# statement with a previous isolated closing paren will
# allow the combined line to be outdented. This is
# generally a good move. For example, we can join up
# the last two lines here:
# (
# $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
# $size, $atime, $mtime, $ctime, $blksize, $blocks
# )
# = stat($file);
#
# to get:
# (
# $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
# $size, $atime, $mtime, $ctime, $blksize, $blocks
# ) = stat($file);
#
# which makes the parens line up.
#
# Another example, from Joe Matarazzo, probably looks best
# with the 'or' clause appended to the trailing paren:
# $self->some_method(
# PARAM1 => 'foo',
# PARAM2 => 'bar'
# ) or die "Some_method didn't work";
#
# But we do not want to do this for something like the -lp
# option where the paren is not outdentable because the
# trailing clause will be far to the right.
#
# The logic here is synchronized with the logic in sub
# sub get_final_indentation, which actually does
# the outdenting.
#
my $combine_ok = $this_line_is_semicolon_terminated
# only one token on last line
&& $ibeg_1 == $iend_1
# must be structural paren
&& $tokens_to_go[$iend_1] eq ')'
# style must allow outdenting,
&& !$closing_token_indentation{')'}
# but leading colons probably line up with a
# previous colon or question (count could be wrong).
&& $type_ibeg_2 ne ':'
# only one step in depth allowed. this line must not
# begin with a ')' itself.
&& ( $nesting_depth_to_go[$iend_1] ==
$nesting_depth_to_go[$iend_2] + 1 );
# But only combine leading '&&', '||', if no previous && || :
# seen. This count includes these tokens at all levels. The
# idea is that seeing these at any level can make it hard to read
# formatting if we recombine.
if ( $is_amp_amp{$type_ibeg_2} ) {
foreach my $n_t ( reverse( 0 .. $n - 2 ) ) {
my $ibeg_t = $ri_beg->[$n_t];
my $type_t = $types_to_go[$ibeg_t];
if ( $is_amp_amp{$type_t} || $type_t eq ':' ) {
$combine_ok = 0;
last;
}
}
}
$skip_Section_3 ||= $combine_ok;
# YVES patch 2 of 2:
# Allow cuddled eval chains, like this:
# eval {
# #STUFF;
# 1; # return true
# } or do {
# #handle error
# };
# This patch works together with a patch in
# setting adjusted indentation (where the closing eval
# brace is outdented if possible).
# The problem is that an 'eval' block has continuation
# indentation and it looks better to undo it in some
# cases. If we do not use this patch we would get:
# eval {
# #STUFF;
# 1; # return true
# }
# or do {
# #handle error
# };
# The alternative, for uncuddled style, is to create
# a patch in get_final_indentation which undoes
# the indentation of a leading line like 'or do {'.
# This doesn't work well with -icb through
if (
$block_type_to_go[$iend_1]
&& $rOpts_brace_follower_vertical_tightness > 0
&& (
# -bfvt=1, allow cuddled eval chains [default]
(
$tokens_to_go[$iend_2] eq '{'
&& $block_type_to_go[$iend_1] eq 'eval'
&& !ref( $leading_spaces_to_go[$iend_1] )
&& !$rOpts_indent_closing_brace
)
# -bfvt=2, allow most brace followers [part of git #110]
|| ( $rOpts_brace_follower_vertical_tightness > 1
&& $ibeg_1 == $iend_1 )
)
&& (
( $type_ibeg_2 =~ /^(\&\&|\|\|)$/ )
|| ( $type_ibeg_2 eq 'k'
&& $is_and_or{ $tokens_to_go[$ibeg_2] } )
|| $is_if_unless{ $tokens_to_go[$ibeg_2] }
)
)
{
$skip_Section_3 ||= 1;
}
return
unless (
$skip_Section_3
# handle '.' and '?' specially below
|| ( $type_ibeg_2 =~ /^[\.\?]$/ )
# fix for c054 (unusual -pbp case)
|| $type_ibeg_2 eq '=='
);
}
elsif ( $type_iend_1 eq '{' ) {
# YVES
# honor breaks at opening brace
# Added to prevent recombining something like this:
# } || eval { package main;
return if ( $forced_breakpoint_to_go[$iend_1] );
}
# do not recombine lines with ending &&, ||,
elsif ( $is_amp_amp{$type_iend_1} ) {
return unless ( $want_break_before{$type_iend_1} );
}
# Identify and recombine a broken ?/: chain
elsif ( $type_iend_1 eq '?' ) {
# Do not recombine different levels
return
if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
# do not recombine unless next line ends in :
return unless $type_iend_2 eq ':';
}
# for lines ending in a comma...
elsif ( $type_iend_1 eq ',' ) {
# Do not recombine at comma which is following the
# input bias.
# NOTE: this could be controlled by a special flag,
# but it seems to work okay.
return if ( $old_breakpoint_to_go[$iend_1] );
# An isolated '},' may join with an identifier + ';'
# This is useful for the class of a 'bless' statement
# (bless.t)
if ( $type_ibeg_1 eq '}'
&& $type_ibeg_2 eq 'i' )
{
return
unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
&& ( $iend_2 == ( $ibeg_2 + 1 ) )
&& $this_line_is_semicolon_terminated );
# override breakpoint
$forced_breakpoint_to_go[$iend_1] = 0;
}
# but otherwise ..
else {
# do not recombine after a comma unless this will
# leave just 1 more line
return if ( $n + 1 < $nmax );
# do not recombine if there is a change in
# indentation depth
return
if ( $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
# do not recombine a "complex expression" after a
# comma. "complex" means no parens.
my $saw_paren;
foreach my $ii ( $ibeg_2 .. $iend_2 ) {
if ( $tokens_to_go[$ii] eq '(' ) {
$saw_paren = 1;
last;
}
}
return if $saw_paren;
}
}
# opening paren..
elsif ( $type_iend_1 eq '(' ) {
# No longer doing this
}
elsif ( $type_iend_1 eq ')' ) {
# No longer doing this
}
# keep a terminal for-semicolon
elsif ( $type_iend_1 eq 'f' ) {
return;
}
# if '=' at end of line ...
elsif ( $is_assignment{$type_iend_1} ) {
# keep break after = if it was in input stream
# this helps prevent 'blinkers'
return
if (
$old_breakpoint_to_go[$iend_1]
# don't strand an isolated '='
&& $iend_1 != $ibeg_1
);
my $is_short_quote =
( $type_ibeg_2 eq 'Q'
&& $ibeg_2 == $iend_2
&& token_sequence_length( $ibeg_2, $ibeg_2 ) <
$rOpts_short_concatenation_item_length );
my $is_ternary = (
$type_ibeg_1 eq '?' && ( $ibeg_3 >= 0
&& $types_to_go[$ibeg_3] eq ':' )
);
# always join an isolated '=', a short quote, or if this
# will put ?/: at start of adjacent lines
if ( $ibeg_1 != $iend_1
&& !$is_short_quote
&& !$is_ternary )
{
my $combine_ok = (
(
# unless we can reduce this to two lines
$nmax < $n + 2
# or three lines, the last with a leading
# semicolon
|| ( $nmax == $n + 2
&& $types_to_go[$ibeg_nmax] eq ';' )
# or the next line ends with a here doc
|| $type_iend_2 eq 'h'
# or the next line ends in an open paren or
# brace and the break hasn't been forced
# [dima.t]
|| (!$forced_breakpoint_to_go[$iend_1]
&& $type_iend_2 eq '{' )
)
# do not recombine if the two lines might align
# well this is a very approximate test for this
&& (
# RT#127633 - the leading tokens are not
# operators
( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
# or they are different
|| ( $ibeg_3 >= 0
&& $type_ibeg_2 ne $types_to_go[$ibeg_3] )
)
);
return if ( !$combine_ok );
if (
# Recombine if we can make two lines
$nmax >= $n + 2
# -lp users often prefer this:
# my $title = function($env, $env, $sysarea,
# "bubba Borrower Entry");
# so we will recombine if -lp is used we have
# ending comma
&& !(
$ibeg_3 > 0
&& ref( $leading_spaces_to_go[$ibeg_3] )
&& $type_iend_2 eq ','
)
)
{
# otherwise, scan the rhs line up to last token for
# complexity. Note that we are not counting the last token
# in case it is an opening paren.
my $ok = simple_rhs( $ri_end, $n, $nmax, $ibeg_2, $iend_2 );
return if ( !$ok );
}
}
if ( $tokens_to_go[$ibeg_2] !~ /^[\{\(\[]$/ ) {
$forced_breakpoint_to_go[$iend_1] = 0;
}
}
# for keywords..
elsif ( $type_iend_1 eq 'k' ) {
# make major control keywords stand out
# (recombine.t)
return
if (
#/^(last|next|redo|return)$/
$is_last_next_redo_return{ $tokens_to_go[$iend_1] }
# but only if followed by multiple lines
&& $n < $nmax
);
if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
return
unless $want_break_before{ $tokens_to_go[$iend_1] };
}
}
elsif ( $type_iend_1 eq '.' ) {
# NOTE: the logic here should match that of section 3 so that
# line breaks are independent of choice of break before or after.
# It would be nice to combine them in section 0, but the
# special junction case ') .' makes that difficult.
# This section added to fix issues c172, c174.
my $i_next_nonblank = $ibeg_2;
my $summed_len_1 = $summed_lengths_to_go[ $iend_1 + 1 ] -
$summed_lengths_to_go[$ibeg_1];
my $summed_len_2 = $summed_lengths_to_go[ $iend_2 + 1 ] -
$summed_lengths_to_go[$ibeg_2];
my $iend_1_minus = max( $ibeg_1, iprev_to_go($iend_1) );
my $combine_ok = (
# ... unless there is just one and we can reduce
# this to two lines if we do. For example, this
#
#
# $bodyA .=
# '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
#
# looks better than this:
# $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' .
# '$args .= $pat;'
# check for 2 lines, not in a long broken '.' chain
( $n == 2 && $n == $nmax && $type_iend_1 ne $type_iend_2 )
# ... or this would strand a short quote , like this
# "some long quote" .
# "\n";
|| (
$types_to_go[$i_next_nonblank] eq 'Q'
&& $i_next_nonblank >= $iend_2 - 2
&& $token_lengths_to_go[$i_next_nonblank] <
$rOpts_short_concatenation_item_length
# additional constraints to fix c167
&& ( $types_to_go[$iend_1_minus] ne 'Q'
|| $summed_len_2 < $summed_len_1 )
)
);
return if ( !$combine_ok );
}
else {
## ok - not a special type
}
return ( 1, $skip_Section_3 );
} ## end sub recombine_section_2
sub simple_rhs {
my ( $ri_end, $n, $nmax, $ibeg_2, $iend_2 ) = @_;
# Scan line ibeg_2 to $iend_2 up to last token for complexity.
# We are not counting the last token in case it is an opening paren.
# Return:
# true if rhs is simple, ok to recombine
# false otherwise
my $tv = 0;
my $depth = $nesting_depth_to_go[$ibeg_2];
foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
if ( $nesting_depth_to_go[$i] != $depth ) {
$tv++;
last if ( $tv > 1 );
}
$depth = $nesting_depth_to_go[$i];
}
# ok to recombine if no level changes before
# last token
if ( $tv > 0 ) {
# otherwise, do not recombine if more than
# two level changes.
return if ( $tv > 1 );
# check total complexity of the two
# adjacent lines that will occur if we do
# this join
my $istop =
( $n < $nmax )
? $ri_end->[ $n + 1 ]
: $iend_2;
foreach my $i ( $iend_2 .. $istop ) {
if ( $nesting_depth_to_go[$i] != $depth ) {
$tv++;
last if ( $tv > 2 );
}
$depth = $nesting_depth_to_go[$i];
}
# do not recombine if total is more than 2
# level changes
return if ( $tv > 2 );
}
return 1;
} ## end sub simple_rhs
sub recombine_section_3 {
my ( $ri_beg, $ri_end, $n, $this_line_is_semicolon_terminated ) = @_;
# Recombine Section 3:
# Examine token at $ibeg_2 (right end of first line of pair)
# Here are Indexes of the endpoint tokens of the two lines:
#
# -----line $n-1--- | -----line $n-----
# $ibeg_1 $iend_1 | $ibeg_2 $iend_2
# ^
# |
# -----Section 3 looks at this token
# Returns:
# (nothing) => do not join lines
# 1, bs_tweak => ok to join lines
# $bstweak is a small tolerance to add to bond strengths
my $bs_tweak = 0;
my $nmax = @{$ri_end} - 1;
my $ibeg_1 = $ri_beg->[ $n - 1 ];
my $iend_1 = $ri_end->[ $n - 1 ];
my $iend_2 = $ri_end->[$n];
my $ibeg_2 = $ri_beg->[$n];
my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
my $ibeg_nmax = $ri_beg->[$nmax];
my $type_iend_1 = $types_to_go[$iend_1];
my $type_iend_2 = $types_to_go[$iend_2];
my $type_ibeg_1 = $types_to_go[$ibeg_1];
my $type_ibeg_2 = $types_to_go[$ibeg_2];
# handle lines with leading &&, ||
if ( $is_amp_amp{$type_ibeg_2} ) {
# ok to recombine if it follows a ? or :
# and is followed by an open paren..
my $ok =
( $is_ternary{$type_ibeg_1} && $tokens_to_go[$iend_2] eq '(' )
# or is followed by a ? or : at same depth
#
# We are looking for something like this. We can
# recombine the && line with the line above to make the
# structure more clear:
# return
# exists $G->{Attr}->{V}
# && exists $G->{Attr}->{V}->{$u}
# ? %{ $G->{Attr}->{V}->{$u} }
# : ();
#
# We should probably leave something like this alone:
# return
# exists $G->{Attr}->{E}
# && exists $G->{Attr}->{E}->{$u}
# && exists $G->{Attr}->{E}->{$u}->{$v}
# ? %{ $G->{Attr}->{E}->{$u}->{$v} }
# : ();
# so that we either have all of the &&'s (or ||'s)
# on one line, as in the first example, or break at
# each one as in the second example. However, it
# sometimes makes things worse to check for this because
# it prevents multiple recombinations. So this is not done.
|| ( $ibeg_3 >= 0
&& $is_ternary{ $types_to_go[$ibeg_3] }
&& $nesting_depth_to_go[$ibeg_3] ==
$nesting_depth_to_go[$ibeg_2] );
# Combine a trailing && term with an || term: fix for
# c060 This is rare but can happen.
$ok ||= 1
if ( $ibeg_3 < 0
&& $type_ibeg_2 eq '&&'
&& $type_ibeg_1 eq '||'
&& $nesting_depth_to_go[$ibeg_2] ==
$nesting_depth_to_go[$ibeg_1] );
return if !$ok && $want_break_before{$type_ibeg_2};
$forced_breakpoint_to_go[$iend_1] = 0;
# tweak the bond strength to give this joint priority
# over ? and :
$bs_tweak = 0.25;
}
# Identify and recombine a broken ?/: chain
elsif ( $type_ibeg_2 eq '?' ) {
# Do not recombine different levels
my $lev = $levels_to_go[$ibeg_2];
return if ( $lev ne $levels_to_go[$ibeg_1] );
# Do not recombine a '?' if either next line or
# previous line does not start with a ':'. The reasons
# are that (1) no alignment of the ? will be possible
# and (2) the expression is somewhat complex, so the
# '?' is harder to see in the interior of the line.
my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
my $precedes_colon = $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
return unless ( $follows_colon || $precedes_colon );
# we will always combining a ? line following a : line
if ( !$follows_colon ) {
# ...otherwise recombine only if it looks like a
# chain. we will just look at a few nearby lines
# to see if this looks like a chain.
my $local_count = 0;
foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
$local_count++
if $ii >= 0
&& $types_to_go[$ii] eq ':'
&& $levels_to_go[$ii] == $lev;
}
return if ( $local_count <= 1 );
}
$forced_breakpoint_to_go[$iend_1] = 0;
}
# do not recombine lines with leading '.'
elsif ( $type_ibeg_2 eq '.' ) {
my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
my $summed_len_1 = $summed_lengths_to_go[ $iend_1 + 1 ] -
$summed_lengths_to_go[$ibeg_1];
my $summed_len_2 = $summed_lengths_to_go[ $iend_2 + 1 ] -
$summed_lengths_to_go[$ibeg_2];
my $combine_ok = (
# ... unless there is just one and we can reduce
# this to two lines if we do. For example, this
#
#
# $bodyA .=
# '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
#
# looks better than this:
# $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
# . '$args .= $pat;'
( $n == 2 && $n == $nmax && $type_ibeg_1 ne $type_ibeg_2 )
# ... or this would strand a short quote , like this
# . "some long quote"
# . "\n";
|| (
$types_to_go[$i_next_nonblank] eq 'Q'
&& $i_next_nonblank >= $iend_2 - 1
&& $token_lengths_to_go[$i_next_nonblank] <
$rOpts_short_concatenation_item_length
# additional constraints to fix c167
&& (
$types_to_go[$iend_1] ne 'Q'
# allow a term shorter than the previous term
|| $summed_len_2 < $summed_len_1
# or allow a short semicolon-terminated term if this
# makes two lines (see c169)
|| ( $n == 2
&& $n == $nmax
&& $this_line_is_semicolon_terminated )
)
)
);
return if ( !$combine_ok );
}
# handle leading keyword..
elsif ( $type_ibeg_2 eq 'k' ) {
# handle leading "or"
if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
my $combine_ok = (
$this_line_is_semicolon_terminated
&& (
$type_ibeg_1 eq '}'
|| (
# following 'if' or 'unless' or 'or'
$type_ibeg_1 eq 'k'
&& $is_if_unless{ $tokens_to_go[$ibeg_1] }
# important: only combine a very simple
# or statement because the step below
# may have combined a trailing 'and'
# with this or, and we do not want to
# then combine everything together
&& ( $iend_2 - $ibeg_2 <= 7 )
)
)
);
return if ( !$combine_ok );
#X: RT #81854
$forced_breakpoint_to_go[$iend_1] = 0
if ( !$old_breakpoint_to_go[$iend_1] );
}
# handle leading 'and' and 'xor'
elsif ($tokens_to_go[$ibeg_2] eq 'and'
|| $tokens_to_go[$ibeg_2] eq 'xor' )
{
# Decide if we will combine a single terminal 'and'
# after an 'if' or 'unless'.
# This looks best with the 'and' on the same
# line as the 'if':
#
# $a = 1
# if $seconds and $nu < 2;
#
# But this looks better as shown:
#
# $a = 1
# if !$this->{Parents}{$_}
# or $this->{Parents}{$_} eq $_;
#
return
unless (
$this_line_is_semicolon_terminated
&& (
# following 'if' or 'unless' or 'or'
$type_ibeg_1 eq 'k'
&& ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
|| $tokens_to_go[$ibeg_1] eq 'or' )
)
);
}
# handle leading "if" and "unless"
elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
# Combine something like:
# next
# if ( $lang !~ /${l}$/i );
# into:
# next if ( $lang !~ /${l}$/i );
return
unless (
$this_line_is_semicolon_terminated
# previous line begins with 'and' or 'or'
&& $type_ibeg_1 eq 'k'
&& $is_and_or{ $tokens_to_go[$ibeg_1] }
);
}
# handle all other leading keywords
else {
# keywords look best at start of lines,
# but combine things like "1 while"
if ( !$is_assignment{$type_iend_1} ) {
return
if ( ( $type_iend_1 ne 'k' )
&& ( $tokens_to_go[$ibeg_2] ne 'while' ) );
}
}
}
# similar treatment of && and || as above for 'and' and
# 'or': NOTE: This block of code is currently bypassed
# because of a previous block but is retained for possible
# future use.
elsif ( $is_amp_amp{$type_ibeg_2} ) {
# maybe looking at something like:
# unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
return
unless (
$this_line_is_semicolon_terminated
# previous line begins with an 'if' or 'unless'
# keyword
&& $type_ibeg_1 eq 'k'
&& $is_if_unless{ $tokens_to_go[$ibeg_1] }
);
}
# handle line with leading = or similar
elsif ( $is_assignment{$type_ibeg_2} ) {
return unless ( $n == 1 || $n == $nmax );
return if ( $old_breakpoint_to_go[$iend_1] );
return
unless (
# unless we can reduce this to two lines
$nmax == 2
# or three lines, the last with a leading semicolon
|| ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
# or the next line ends with a here doc
|| $type_iend_2 eq 'h'
# or this is a short line ending in ;
|| ( $n == $nmax
&& $this_line_is_semicolon_terminated )
);
$forced_breakpoint_to_go[$iend_1] = 0;
}
else {
## ok - not a special type
}
return ( 1, $bs_tweak );
} ## end sub recombine_section_3
} ## end closure recombine_breakpoints
sub insert_final_ternary_breaks {
my ( $self, $ri_left, $ri_right ) = @_;
# Called once per batch to look for and do any final line breaks for
# long ternary chains
my $nmax = @{$ri_right} - 1;
# scan the left and right end tokens of all lines
my $i_first_colon = -1;
for my $n ( 0 .. $nmax ) {
my $il = $ri_left->[$n];
my $ir = $ri_right->[$n];
my $typel = $types_to_go[$il];
my $typer = $types_to_go[$ir];
return if ( $typel eq '?' );
return if ( $typer eq '?' );
if ( $typel eq ':' ) { $i_first_colon = $il; last; }
if ( $typer eq ':' ) { $i_first_colon = $ir; last; }
}
# For long ternary chains,
# if the first : we see has its ? is in the interior
# of a preceding line, then see if there are any good
# breakpoints before the ?.
if ( $i_first_colon > 0 ) {
my $i_question = $mate_index_to_go[$i_first_colon];
if ( defined($i_question) && $i_question > 0 ) {
my @insert_list;
foreach my $ii ( reverse( 0 .. $i_question - 1 ) ) {
my $token = $tokens_to_go[$ii];
my $type = $types_to_go[$ii];
# For now, a good break is either a comma or,
# in a long chain, a 'return'.
# Patch for RT #126633: added the $nmax>1 check to avoid
# breaking after a return for a simple ternary. For longer
# chains the break after return allows vertical alignment, so
# it is still done. So perltidy -wba='?' will not break
# immediately after the return in the following statement:
# sub x {
# return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
# 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
# }
if (
(
$type eq ','
|| $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
)
&& $self->in_same_container_i( $ii, $i_question )
)
{
push @insert_list, $ii;
last;
}
}
# insert any new break points
if (@insert_list) {
$self->insert_additional_breaks( \@insert_list, $ri_left,
$ri_right );
}
}
}
return;
} ## end sub insert_final_ternary_breaks
sub insert_breaks_before_list_opening_containers {
my ( $self, $ri_left, $ri_right ) = @_;
# This routine is called once per batch to implement the parameters
# --break-before-hash-brace, etc.
# Nothing to do if none of these parameters has been set
return unless %break_before_container_types;
my $nmax = @{$ri_right} - 1;
return if ( $nmax < 0 );
my $rLL = $self->[_rLL_];
my $rbreak_before_container_by_seqno =
$self->[_rbreak_before_container_by_seqno_];
my $rK_weld_left = $self->[_rK_weld_left_];
# scan the ends of all lines
my @insert_list;
for my $n ( 0 .. $nmax ) {
my $il = $ri_left->[$n];
my $ir = $ri_right->[$n];
next if ( $ir <= $il );
my $Kl = $K_to_go[$il];
my $Kr = $K_to_go[$ir];
my $Kend = $Kr;
my $type_end = $rLL->[$Kr]->[_TYPE_];
# Backup before any side comment
if ( $type_end eq '#' ) {
$Kend = $self->K_previous_nonblank($Kr);
next unless defined($Kend);
$type_end = $rLL->[$Kend]->[_TYPE_];
}
# Backup to the start of any weld; fix for b1173.
if ($total_weld_count) {
my $Kend_test = $rK_weld_left->{$Kend};
if ( defined($Kend_test) && $Kend_test > $Kl ) {
$Kend = $Kend_test;
$Kend_test = $rK_weld_left->{$Kend};
}
# Do not break if we did not back up to the start of a weld
# (shouldn't happen)
next if ( defined($Kend_test) );
}
my $token = $rLL->[$Kend]->[_TOKEN_];
next if ( !$is_opening_token{$token} );
next if ( $Kl >= $Kend - 1 );
my $seqno = $rLL->[$Kend]->[_TYPE_SEQUENCE_];
next if ( !defined($seqno) );
# Use the flag which was previously set
next unless ( $rbreak_before_container_by_seqno->{$seqno} );
# Install a break before this opening token.
my $Kbreak = $self->K_previous_nonblank($Kend);
my $ibreak = $Kbreak - $Kl + $il;
next if ( $ibreak < $il );
next if ( $nobreak_to_go[$ibreak] );
push @insert_list, $ibreak;
}
# insert any new break points
if (@insert_list) {
$self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
}
return;
} ## end sub insert_breaks_before_list_opening_containers
sub note_added_semicolon {
my ( $self, $line_number ) = @_;
$self->[_last_added_semicolon_at_] = $line_number;
if ( $self->[_added_semicolon_count_] == 0 ) {
$self->[_first_added_semicolon_at_] = $line_number;
}
$self->[_added_semicolon_count_]++;
write_logfile_entry("Added ';' here\n");
return;
} ## end sub note_added_semicolon
sub note_deleted_semicolon {
my ( $self, $line_number ) = @_;
$self->[_last_deleted_semicolon_at_] = $line_number;
if ( $self->[_deleted_semicolon_count_] == 0 ) {
$self->[_first_deleted_semicolon_at_] = $line_number;
}
$self->[_deleted_semicolon_count_]++;
write_logfile_entry("Deleted unnecessary ';' at line $line_number\n");
return;
} ## end sub note_deleted_semicolon
sub note_embedded_tab {
my ( $self, $line_number ) = @_;
$self->[_embedded_tab_count_]++;
$self->[_last_embedded_tab_at_] = $line_number;
if ( !$self->[_first_embedded_tab_at_] ) {
$self->[_first_embedded_tab_at_] = $line_number;
}
if ( $self->[_embedded_tab_count_] <= MAX_NAG_MESSAGES ) {
write_logfile_entry("Embedded tabs in quote or pattern\n");
}
return;
} ## end sub note_embedded_tab
use constant DEBUG_CORRECT_LP => 0;
sub correct_lp_indentation {
# When the -lp option is used, we need to make a last pass through
# each line to correct the indentation positions in case they differ
# from the predictions. This is necessary because perltidy uses a
# predictor/corrector method for aligning with opening parens. The
# predictor is usually good, but sometimes stumbles. The corrector
# tries to patch things up once the actual opening paren locations
# are known.
my ( $self, $ri_first, $ri_last ) = @_;
# first remove continuation indentation if appropriate
my $max_line = @{$ri_first} - 1;
#---------------------------------------------------------------------------
# PASS 1: reduce indentation if necessary at any long one-line blocks (c098)
#---------------------------------------------------------------------------
# The point is that sub 'starting_one_line_block' made one-line blocks based
# on default indentation, not -lp indentation. So some of the one-line
# blocks may be too long when given -lp indentation. We will fix that now
# if possible, using the list of these closing block indexes.
my $ri_starting_one_line_block =
$self->[_this_batch_]->[_ri_starting_one_line_block_];
if ( @{$ri_starting_one_line_block} ) {
$self->correct_lp_indentation_pass_1( $ri_first, $ri_last,
$ri_starting_one_line_block );
}
#-------------------------------------------------------------------
# PASS 2: look for and fix other problems in each line of this batch
#-------------------------------------------------------------------
# look at each output line ...
foreach my $line ( 0 .. $max_line ) {
my $ibeg = $ri_first->[$line];
my $iend = $ri_last->[$line];
# looking at each token in this output line ...
foreach my $i ( $ibeg .. $iend ) {
# How many space characters to place before this token
# for special alignment. Actual padding is done in the
# continue block.
# looking for next unvisited indentation item ...
my $indentation = $leading_spaces_to_go[$i];
# This is just for indentation objects (c098)
next unless ( ref($indentation) );
# Visit each indentation object just once
next if ( $indentation->get_marked() );
# Mark first visit
$indentation->set_marked(1);
# Skip indentation objects which do not align with container tokens
my $align_seqno = $indentation->get_align_seqno();
next unless ($align_seqno);
# Skip a container which is entirely on this line
my $Ko = $self->[_K_opening_container_]->{$align_seqno};
my $Kc = $self->[_K_closing_container_]->{$align_seqno};
if ( defined($Ko) && defined($Kc) ) {
next if ( $Ko >= $K_to_go[$ibeg] && $Kc <= $K_to_go[$iend] );
}
# Note on flag '$do_not_pad':
# We want to avoid a situation like this, where the aligner
# inserts whitespace before the '=' to align it with a previous
# '=', because otherwise the parens might become mis-aligned in a
# situation like this, where the '=' has become aligned with the
# previous line, pushing the opening '(' forward beyond where we
# want it.
#
# $mkFloor::currentRoom = '';
# $mkFloor::c_entry = $c->Entry(
# -width => '10',
# -relief => 'sunken',
# ...
# );
#
# We leave it to the aligner to decide how to do this.
if ( $line == 1 && $i == $ibeg ) {
$self->[_this_batch_]->[_do_not_pad_] = 1;
}
#--------------------------------------------
# Now see what the error is and try to fix it
#--------------------------------------------
my $closing_index = $indentation->get_closed();
my $predicted_pos = $indentation->get_spaces();
# Find actual position:
my $actual_pos;
if ( $i == $ibeg ) {
# Case 1: token is first character of of batch - table lookup
if ( $line == 0 ) {
$actual_pos = $predicted_pos;
my ( $indent, $offset, $is_leading, $exists ) =
get_saved_opening_indentation($align_seqno);
if ( defined($indent) ) {
# NOTE: we could use '1' here if no space after
# opening and '2' if want space; it is hardwired at 1
# like -gnu-style. But it is probably best to leave
# this alone because changing it would change
# formatting of much existing code without any
# significant benefit.
$actual_pos = get_spaces($indent) + $offset + 1;
}
}
# Case 2: token starts a new line - use length of previous line
else {
my $ibegm = $ri_first->[ $line - 1 ];
my $iendm = $ri_last->[ $line - 1 ];
$actual_pos = total_line_length( $ibegm, $iendm );
# follow -pt style
++$actual_pos
if ( $types_to_go[ $iendm + 1 ] eq 'b' );
}
}
# Case 3: $i>$ibeg: token is mid-line - use length to previous token
else {
$actual_pos = total_line_length( $ibeg, $i - 1 );
# for mid-line token, we must check to see if all
# additional lines have continuation indentation,
# and remove it if so. Otherwise, we do not get
# good alignment.
if ( $closing_index > $iend ) {
my $ibeg_next = $ri_first->[ $line + 1 ];
if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
$self->undo_lp_ci( $line, $i, $closing_index,
$ri_first, $ri_last );
}
}
}
# By how many spaces (plus or minus) would we need to increase the
# indentation to get alignment with the opening token?
my $move_right = $actual_pos - $predicted_pos;
if (DEBUG_CORRECT_LP) {
my $tok = substr( $tokens_to_go[$i], 0, 8 );
my $avail = $self->get_available_spaces_to_go($ibeg);
print
"CORRECT_LP for seq=$align_seqno, predicted pos=$predicted_pos actual=$actual_pos => move right=$move_right available=$avail i=$i max=$max_index_to_go tok=$tok\n";
}
# nothing more to do if no error to correct (gnu2.t)
if ( $move_right == 0 ) {
$indentation->set_recoverable_spaces($move_right);
next;
}
# Get any collapsed length defined for -xlp
my $collapsed_length =
$self->[_rcollapsed_length_by_seqno_]->{$align_seqno};
$collapsed_length = 0 unless ( defined($collapsed_length) );
if (DEBUG_CORRECT_LP) {
print
"CORRECT_LP for seq=$align_seqno, collapsed length is $collapsed_length\n";
}
# if we have not seen closure for this indentation in this batch,
# and do not have a collapsed length estimate, we can only pass on
# a request to the vertical aligner
if ( $closing_index < 0 && !$collapsed_length ) {
$indentation->set_recoverable_spaces($move_right);
next;
}
# If necessary, look ahead to see if there is really any leading
# whitespace dependent on this whitespace, and also find the
# longest line using this whitespace. Since it is always safe to
# move left if there are no dependents, we only need to do this if
# we may have dependent nodes or need to move right.
my $have_child = $indentation->get_have_child();
my %saw_indentation;
my $line_count = 1;
$saw_indentation{$indentation} = $indentation;
# How far can we move right before we hit the limit?
# let $right_margen = the number of spaces that we can increase
# the current indentation before hitting the maximum line length.
my $right_margin = 0;
if ( $have_child || $move_right > 0 ) {
$have_child = 0;
# include estimated collapsed length for incomplete containers
my $max_length = 0;
if ( $Kc > $K_to_go[$max_index_to_go] ) {
$max_length = $collapsed_length + $predicted_pos;
}
if ( $i == $ibeg ) {
my $length = total_line_length( $ibeg, $iend );
if ( $length > $max_length ) { $max_length = $length }
}
# look ahead at the rest of the lines of this batch..
foreach my $line_t ( $line + 1 .. $max_line ) {
my $ibeg_t = $ri_first->[$line_t];
my $iend_t = $ri_last->[$line_t];
last if ( $closing_index <= $ibeg_t );
# remember all different indentation objects
my $indentation_t = $leading_spaces_to_go[$ibeg_t];
$saw_indentation{$indentation_t} = $indentation_t;
$line_count++;
# remember longest line in the group
my $length_t = total_line_length( $ibeg_t, $iend_t );
if ( $length_t > $max_length ) {
$max_length = $length_t;
}
}
$right_margin =
$maximum_line_length_at_level[ $levels_to_go[$ibeg] ] -
$max_length;
if ( $right_margin < 0 ) { $right_margin = 0 }
}
my $first_line_comma_count =
grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
my $comma_count = $indentation->get_comma_count();
my $arrow_count = $indentation->get_arrow_count();
# This is a simple approximate test for vertical alignment:
# if we broke just after an opening paren, brace, bracket,
# and there are 2 or more commas in the first line,
# and there are no '=>'s,
# then we are probably vertically aligned. We could set
# an exact flag in sub break_lists, but this is good
# enough.
my $indentation_count = keys %saw_indentation;
my $is_vertically_aligned =
( $i == $ibeg
&& $first_line_comma_count > 1
&& $indentation_count == 1
&& ( $arrow_count == 0 || $arrow_count == $line_count ) );
# Make the move if possible ..
if (
# we can always move left
$move_right < 0
# -xlp
# incomplete container
|| ( $rOpts_extended_line_up_parentheses
&& $Kc > $K_to_go[$max_index_to_go] )
|| $closing_index < 0
# but we should only move right if we are sure it will
# not spoil vertical alignment
|| ( $comma_count == 0 )
|| ( $comma_count > 0 && !$is_vertically_aligned )
)
{
my $move =
( $move_right <= $right_margin )
? $move_right
: $right_margin;
if (DEBUG_CORRECT_LP) {
print
"CORRECT_LP for seq=$align_seqno, moving $move spaces\n";
}
foreach ( keys %saw_indentation ) {
$saw_indentation{$_}
->permanently_decrease_available_spaces( -$move );
}
}
# Otherwise, record what we want and the vertical aligner
# will try to recover it.
else {
$indentation->set_recoverable_spaces($move_right);
}
} ## end loop over tokens in a line
} ## end loop over lines
return;
} ## end sub correct_lp_indentation
sub correct_lp_indentation_pass_1 {
my ( $self, $ri_first, $ri_last, $ri_starting_one_line_block ) = @_;
# So some of the one-line blocks may be too long when given -lp
# indentation. We will fix that now if possible, using the list of these
# closing block indexes.
my @ilist = @{$ri_starting_one_line_block};
return unless (@ilist);
my $max_line = @{$ri_first} - 1;
my $inext = shift(@ilist);
# loop over lines, checking length of each with a one-line block
my ( $ibeg, $iend );
foreach my $line ( 0 .. $max_line ) {
$iend = $ri_last->[$line];
next if ( $inext > $iend );
$ibeg = $ri_first->[$line];
# This is just for lines with indentation objects (c098)
my $excess =
ref( $leading_spaces_to_go[$ibeg] )
? $self->excess_line_length( $ibeg, $iend )
: 0;
if ( $excess > 0 ) {
my $available_spaces = $self->get_available_spaces_to_go($ibeg);
if ( $available_spaces > 0 ) {
my $delete_want = min( $available_spaces, $excess );
my $deleted_spaces =
$self->reduce_lp_indentation( $ibeg, $delete_want );
$available_spaces = $self->get_available_spaces_to_go($ibeg);
}
}
# skip forward to next one-line block to check
while (@ilist) {
$inext = shift @ilist;
next if ( $inext <= $iend );
last if ( $inext > $iend );
}
last if ( $inext <= $iend );
}
return;
} ## end sub correct_lp_indentation_pass_1
sub undo_lp_ci {
# If there is a single, long parameter within parens, like this:
#
# $self->command( "/msg "
# . $infoline->chan
# . " You said $1, but did you know that it's square was "
# . $1 * $1 . " ?" );
#
# we can remove the continuation indentation of the 2nd and higher lines
# to achieve this effect, which is more pleasing:
#
# $self->command("/msg "
# . $infoline->chan
# . " You said $1, but did you know that it's square was "
# . $1 * $1 . " ?");
my ( $self, $line_open, $i_start, $closing_index, $ri_first, $ri_last ) =
@_;
my $max_line = @{$ri_first} - 1;
# must be multiple lines
return if ( $max_line <= $line_open );
my $lev_start = $levels_to_go[$i_start];
my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
# see if all additional lines in this container have continuation
# indentation
my $line_1 = 1 + $line_open;
my $n = $line_open;
while ( ++$n <= $max_line ) {
my $ibeg = $ri_first->[$n];
my $iend = $ri_last->[$n];
if ( $ibeg eq $closing_index ) { $n--; last }
return if ( $lev_start != $levels_to_go[$ibeg] );
return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
last if ( $closing_index <= $iend );
}
# we can reduce the indentation of all continuation lines
my $continuation_line_count = $n - $line_open;
@ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
(0) x ($continuation_line_count);
@leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
@reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
return;
} ## end sub undo_lp_ci
################################################
# CODE SECTION 10: Code to break long statements
################################################
use constant DEBUG_BREAK_LINES => 0;
sub break_long_lines {
#-----------------------------------------------------------
# Break a batch of tokens into lines which do not exceed the
# maximum line length.
#-----------------------------------------------------------
my ( $self, $saw_good_break, $rcolon_list, $rbond_strength_bias ) = @_;
# Input parameters:
# $saw_good_break - a flag set by break_lists
# $rcolon_list - ref to a list of all the ? and : tokens in the batch,
# in order.
# $rbond_strength_bias - small bond strength bias values set by break_lists
# Output: returns references to the arrays:
# @i_first
# @i_last
# which contain the indexes $i of the first and last tokens on each
# line.
# In addition, the array:
# $forced_breakpoint_to_go[$i]
# may be updated to be =1 for any index $i after which there must be
# a break. This signals later routines not to undo the breakpoint.
# Method:
# This routine is called if a statement is longer than the maximum line
# length, or if a preliminary scanning located desirable break points.
# Sub break_lists has already looked at these tokens and set breakpoints
# (in array $forced_breakpoint_to_go[$i]) where it wants breaks (for
# example after commas, after opening parens, and before closing parens).
# This routine will honor these breakpoints and also add additional
# breakpoints as necessary to keep the line length below the maximum
# requested. It bases its decision on where the 'bond strength' is
# lowest.
my @i_first = (); # the first index to output
my @i_last = (); # the last index to output
my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
# Get the 'bond strengths' between tokens
my $rbond_strength_to_go = $self->set_bond_strengths();
# Add any comma bias set by break_lists
if ( @{$rbond_strength_bias} ) {
foreach my $item ( @{$rbond_strength_bias} ) {
my ( $ii, $bias ) = @{$item};
if ( $ii >= 0 && $ii <= $max_index_to_go ) {
$rbond_strength_to_go->[$ii] += $bias;
}
else {
if (DEVEL_MODE) {
my $KK = $K_to_go[0];
my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_];
Fault(
"Bad bond strength bias near line $lno: i=$ii must be between 0 and $max_index_to_go\n"
);
}
}
}
}
my $imin = 0;
my $imax = $max_index_to_go;
if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
my $i_begin = $imin;
my $last_break_strength = NO_BREAK;
my $i_last_break = -1;
my $line_count = 0;
# see if any ?/:'s are in order
my $colons_in_order = 1;
my $last_tok = EMPTY_STRING;
foreach ( @{$rcolon_list} ) {
if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
$last_tok = $_;
}
# This is a sufficient but not necessary condition for colon chain
my $is_colon_chain = ( $colons_in_order && @{$rcolon_list} > 2 );
#------------------------------------------
# BEGINNING of main loop to set breakpoints
# Keep iterating until we reach the end
#------------------------------------------
while ( $i_begin <= $imax ) {
#------------------------------------------------------------------
# Find the best next breakpoint based on token-token bond strengths
#------------------------------------------------------------------
my ( $i_lowest, $lowest_strength, $leading_alignment_type, $Msg ) =
$self->break_lines_inner_loop(
$i_begin,
$i_last_break,
$imax,
$last_break_strength,
$line_count,
$rbond_strength_to_go,
$saw_good_break,
);
# Now make any adjustments required by ternary breakpoint rules
if ( @{$rcolon_list} ) {
my $i_next_nonblank = $inext_to_go[$i_lowest];
#-------------------------------------------------------
# ?/: rule 1 : if a break here will separate a '?' on this
# line from its closing ':', then break at the '?' instead.
# But do not break a sequential chain of ?/: statements
#-------------------------------------------------------
if ( !$is_colon_chain ) {
foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
next unless ( $tokens_to_go[$i] eq '?' );
# do not break if statement is broken by side comment
next
if ( $tokens_to_go[$max_index_to_go] eq '#'
&& terminal_type_i( 0, $max_index_to_go ) !~
/^[\;\}]$/ );
# no break needed if matching : is also on the line
next
if ( defined( $mate_index_to_go[$i] )
&& $mate_index_to_go[$i] <= $i_next_nonblank );
$i_lowest = $i;
if ( $want_break_before{'?'} ) { $i_lowest-- }
$i_next_nonblank = $inext_to_go[$i_lowest];
last;
}
}
my $next_nonblank_type = $types_to_go[$i_next_nonblank];
#-------------------------------------------------------------
# ?/: rule 2 : if we break at a '?', then break at its ':'
#
# Note: this rule is also in sub break_lists to handle a break
# at the start and end of a line (in case breaks are dictated
# by side comments).
#-------------------------------------------------------------
if ( $next_nonblank_type eq '?' ) {
$self->set_closing_breakpoint($i_next_nonblank);
}
elsif ( $types_to_go[$i_lowest] eq '?' ) {
$self->set_closing_breakpoint($i_lowest);
}
else {
## ok
}
#--------------------------------------------------------
# ?/: rule 3 : if we break at a ':' then we save
# its location for further work below. We may need to go
# back and break at its '?'.
#--------------------------------------------------------
if ( $next_nonblank_type eq ':' ) {
push @i_colon_breaks, $i_next_nonblank;
}
elsif ( $types_to_go[$i_lowest] eq ':' ) {
push @i_colon_breaks, $i_lowest;
}
else {
## ok
}
# here we should set breaks for all '?'/':' pairs which are
# separated by this line
}
# guard against infinite loop (should never happen)
if ( $i_lowest <= $i_last_break ) {
DEVEL_MODE
&& Fault("i_lowest=$i_lowest <= i_last_break=$i_last_break\n");
$i_lowest = $imax;
}
DEBUG_BREAK_LINES
&& print {*STDOUT}
"BREAK: best is i = $i_lowest strength = $lowest_strength;\nReason>> $Msg\n";
$line_count++;
# save this line segment, after trimming blanks at the ends
push( @i_first,
( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
push( @i_last,
( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
# set a forced breakpoint at a container opening, if necessary, to
# signal a break at a closing container. Excepting '(' for now.
if (
(
$tokens_to_go[$i_lowest] eq '{'
|| $tokens_to_go[$i_lowest] eq '['
)
&& !$forced_breakpoint_to_go[$i_lowest]
)
{
$self->set_closing_breakpoint($i_lowest);
}
# get ready to find the next breakpoint
$last_break_strength = $lowest_strength;
$i_last_break = $i_lowest;
$i_begin = $i_lowest + 1;
# skip past a blank
if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
$i_begin++;
}
}
#-------------------------------------------------
# END of main loop to set continuation breakpoints
#-------------------------------------------------
#-----------------------------------------------------------
# ?/: rule 4 -- if we broke at a ':', then break at
# corresponding '?' unless this is a chain of ?: expressions
#-----------------------------------------------------------
if (@i_colon_breaks) {
my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
if ( !$is_chain ) {
$self->do_colon_breaks( \@i_colon_breaks, \@i_first, \@i_last );
}
}
return ( \@i_first, \@i_last, $rbond_strength_to_go );
} ## end sub break_long_lines
# small bond strength numbers to help break ties
use constant TINY_BIAS => 0.0001;
use constant MAX_BIAS => 0.001;
sub break_lines_inner_loop {
#-----------------------------------------------------------------
# Find the best next breakpoint in index range ($i_begin .. $imax)
# which, if possible, does not exceed the maximum line length.
#-----------------------------------------------------------------
my (
$self, #
$i_begin,
$i_last_break,
$imax,
$last_break_strength,
$line_count,
$rbond_strength_to_go,
$saw_good_break,
) = @_;
# Given:
# $i_begin = first index of range
# $i_last_break = index of previous break
# $imax = last index of range
# $last_break_strength = bond strength of last break
# $line_count = number of output lines so far
# $rbond_strength_to_go = ref to array of bond strengths
# $saw_good_break = true if old line had a good breakpoint
# Returns:
# $i_lowest = index of best breakpoint
# $lowest_strength = 'bond strength' at best breakpoint
# $leading_alignment_type = special token type after break
# $Msg = string of debug info
my $Msg = EMPTY_STRING;
my $strength = NO_BREAK;
my $i_test = $i_begin - 1;
my $i_lowest = -1;
my $starting_sum = $summed_lengths_to_go[$i_begin];
my $lowest_strength = NO_BREAK;
my $leading_alignment_type = EMPTY_STRING;
my $leading_spaces = leading_spaces_to_go($i_begin);
my $maximum_line_length =
$maximum_line_length_at_level[ $levels_to_go[$i_begin] ];
DEBUG_BREAK_LINES
&& do {
$Msg .= "updating leading spaces to be $leading_spaces at i=$i_begin\n";
};
# Do not separate an isolated bare word from an opening paren.
# Alternate Fix #2 for issue b1299. This waits as long as possible
# to make the decision.
# Note for fix #c250: to keep line breaks unchanged under -extrude when
# switching from 'i' to 'S' for subs, we would have to also check 'S', i.e.
# =~/^[Si]$/. But this was never necessary at a sub signature, so we leave
# it alone and allow the new version to be different for --extrude. For a
# test file run perl527/signatures.t with --extrude.
if ( $types_to_go[$i_begin] eq 'i'
&& substr( $tokens_to_go[$i_begin], 0, 1 ) =~ /\w/ )
{
my $i_next_nonblank = $inext_to_go[$i_begin];
if ( $tokens_to_go[$i_next_nonblank] eq '(' ) {
$rbond_strength_to_go->[$i_begin] = NO_BREAK;
}
}
# Avoid a break which would strand a single punctuation
# token. For example, we do not want to strand a leading
# '.' which is followed by a long quoted string.
# But note that we do want to do this with -extrude (l=1)
# so please test any changes to this code on -extrude.
if (
( $i_begin < $imax )
&& ( $tokens_to_go[$i_begin] eq $types_to_go[$i_begin] )
&& !$forced_breakpoint_to_go[$i_begin]
&& !(
# Allow break after a closing eval brace. This is an
# approximate way to simulate a forced breakpoint made in
# Section B below. No differences have been found, but if
# necessary the full logic of Section B could be used here
# (see c165).
$tokens_to_go[$i_begin] eq '}'
&& $block_type_to_go[$i_begin]
&& $block_type_to_go[$i_begin] eq 'eval'
)
&& (
(
$leading_spaces +
$summed_lengths_to_go[ $i_begin + 1 ] -
$starting_sum
) < $maximum_line_length
)
)
{
$i_test = min( $imax, $inext_to_go[$i_begin] ) - 1;
DEBUG_BREAK_LINES && do {
$Msg .= " :skip ahead at i=$i_test";
};
}
#-------------------------------------------------------
# Begin INNER_LOOP over the indexes in the _to_go arrays
#-------------------------------------------------------
while ( ++$i_test <= $imax ) {
my $type = $types_to_go[$i_test];
my $token = $tokens_to_go[$i_test];
my $i_next_nonblank = $inext_to_go[$i_test];
my $next_nonblank_type = $types_to_go[$i_next_nonblank];
my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
#---------------------------------------------------------------
# Section A: Get token-token strength and handle any adjustments
#---------------------------------------------------------------
# adjustments to the previous bond strength may have been made, and
# we must keep the bond strength of a token and its following blank
# the same;
my $last_strength = $strength;
$strength = $rbond_strength_to_go->[$i_test];
if ( $type eq 'b' ) { $strength = $last_strength }
# reduce strength a bit to break ties at an old comma breakpoint ...
if (
$old_breakpoint_to_go[$i_test]
# Patch: limited to just commas to avoid blinking states
&& $type eq ','
# which is a 'good' breakpoint, meaning ...
# we don't want to break before it
&& !$want_break_before{$type}
# and either we want to break before the next token
# or the next token is not short (i.e. not a '*', '/' etc.)
&& $i_next_nonblank <= $imax
&& ( $want_break_before{$next_nonblank_type}
|| $token_lengths_to_go[$i_next_nonblank] > 2
|| $next_nonblank_type eq ','
|| $is_opening_type{$next_nonblank_type} )
)
{
$strength -= TINY_BIAS;
DEBUG_BREAK_LINES && do { $Msg .= " :-bias at i=$i_test" };
}
# otherwise increase strength a bit if this token would be at the
# maximum line length. This is necessary to avoid blinking
# in the above example when the -iob flag is added.
else {
my $len =
$leading_spaces +
$summed_lengths_to_go[ $i_test + 1 ] -
$starting_sum;
if ( $len >= $maximum_line_length ) {
$strength += TINY_BIAS;
DEBUG_BREAK_LINES && do { $Msg .= " :+bias at i=$i_test" };
}
}
#-------------------------------------
# Section B: Handle forced breakpoints
#-------------------------------------
my $must_break;
# Force an immediate break at certain operators
# with lower level than the start of the line,
# unless we've already seen a better break.
#
# Note on an issue with a preceding '?' :
# There may be a break at a previous ? if the line is long. Because
# of this we do not want to force a break if there is a previous ? on
# this line. For now the best way to do this is to not break if we
# have seen a lower strength point, which is probably a ?.
#
# Example of unwanted breaks we are avoiding at a '.' following a ?
# from pod2html using perltidy -gnu:
# )
# ? "\n<A NAME=\""
# . $value
# . "\">\n$text</A>\n"
# : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
if (
( $strength <= $lowest_strength )
&& ( $nesting_depth_to_go[$i_begin] >
$nesting_depth_to_go[$i_next_nonblank] )
&& (
$next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
|| (
$next_nonblank_type eq 'k'
## /^(and|or)$/ # note: includes 'xor' now
&& $is_and_or{$next_nonblank_token}
)
)
)
{
$self->set_forced_breakpoint($i_next_nonblank);
DEBUG_BREAK_LINES
&& do { $Msg .= " :Forced break at i=$i_next_nonblank" };
}
if (
# Try to put a break where requested by break_lists
$forced_breakpoint_to_go[$i_test]
# break between ) { in a continued line so that the '{' can
# be outdented
# See similar logic in break_lists which catches instances
# where a line is just something like ') {'. We have to
# be careful because the corresponding block keyword might
# not be on the first line, such as 'for' here:
#
# eval {
# for ("a") {
# for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
# }
# };
#
|| (
$line_count
&& ( $token eq ')' )
&& ( $next_nonblank_type eq '{' )
&& ($next_nonblank_block_type)
&& ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
# RT #104427: Dont break before opening sub brace because
# sub block breaks handled at higher level, unless
# it looks like the preceding list is long and broken
&& !(
(
$next_nonblank_block_type =~ /$SUB_PATTERN/
|| $matches_ASUB{$next_nonblank_block_type}
)
&& ( $nesting_depth_to_go[$i_begin] ==
$nesting_depth_to_go[$i_next_nonblank] )
)
&& !$rOpts_opening_brace_always_on_right
)
# There is an implied forced break at a terminal opening brace
|| ( ( $type eq '{' ) && ( $i_test == $imax ) )
)
{
# Forced breakpoints must sometimes be overridden, for example
# because of a side comment causing a NO_BREAK. It is easier
# to catch this here than when they are set.
if ( $strength < NO_BREAK - 1 ) {
$strength = $lowest_strength - TINY_BIAS;
$must_break = 1;
DEBUG_BREAK_LINES
&& do { $Msg .= " :set must_break at i=$i_next_nonblank" };
}
}
# quit if a break here would put a good terminal token on
# the next line and we already have a possible break
if (
( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
&& !$must_break
&& (
(
$leading_spaces +
$summed_lengths_to_go[ $i_next_nonblank + 1 ] -
$starting_sum
) > $maximum_line_length
)
)
{
if ( $i_lowest >= 0 ) {
DEBUG_BREAK_LINES && do {
$Msg .= " :quit at good terminal='$next_nonblank_type'";
};
last;
}
}
#------------------------------------------------------------
# Section C: Look for the lowest bond strength between tokens
#------------------------------------------------------------
if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) ) {
# break at previous best break if it would have produced
# a leading alignment of certain common tokens, and it
# is different from the latest candidate break
if ($leading_alignment_type) {
DEBUG_BREAK_LINES && do {
$Msg .=
" :last at leading_alignment='$leading_alignment_type'";
};
last;
}
# Force at least one breakpoint if old code had good
# break It is only called if a breakpoint is required or
# desired. This will probably need some adjustments
# over time. A goal is to try to be sure that, if a new
# side comment is introduced into formatted text, then
# the same breakpoints will occur. scbreak.t
if (
$i_test == $imax # we are at the end
&& !$forced_breakpoint_count
&& $saw_good_break # old line had good break
&& $type =~ /^[#;\{]$/ # and this line ends in
# ';' or side comment
&& $i_last_break < 0 # and we haven't made a break
&& $i_lowest >= 0 # and we saw a possible break
&& $i_lowest < $imax - 1 # (but not just before this ;)
&& $strength - $lowest_strength < 0.5 * WEAK # and it's good
)
{
DEBUG_BREAK_LINES && do {
$Msg .= " :last at good old break\n";
};
last;
}
# Do not skip past an important break point in a short final
# segment. For example, without this check we would miss the
# break at the final / in the following code:
#
# $depth_stop =
# ( $tau * $mass_pellet * $q_0 *
# ( 1. - exp( -$t_stop / $tau ) ) -
# 4. * $pi * $factor * $k_ice *
# ( $t_melt - $t_ice ) *
# $r_pellet *
# $t_stop ) /
# ( $rho_ice * $Qs * $pi * $r_pellet**2 );
#
if (
$line_count > 2
&& $i_lowest >= 0 # and we saw a possible break
&& $i_lowest < $i_test
&& $i_test > $imax - 2
&& $nesting_depth_to_go[$i_begin] >
$nesting_depth_to_go[$i_lowest]
&& $lowest_strength < $last_break_strength - .5 * WEAK
)
{
# Make this break for math operators for now
my $ir = $inext_to_go[$i_lowest];
my $il = iprev_to_go($ir);
if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
|| $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ )
{
DEBUG_BREAK_LINES && do {
$Msg .= " :last-noskip_short";
};
last;
}
}
# Update the minimum bond strength location
$lowest_strength = $strength;
$i_lowest = $i_test;
if ($must_break) {
DEBUG_BREAK_LINES && do {
$Msg .= " :last-must_break";
};
last;
}
# set flags to remember if a break here will produce a
# leading alignment of certain common tokens
if ( $line_count > 0
&& $i_test < $imax
&& ( $lowest_strength - $last_break_strength <= MAX_BIAS ) )
{
my $i_last_end = iprev_to_go($i_begin);
my $tok_beg = $tokens_to_go[$i_begin];
my $type_beg = $types_to_go[$i_begin];
if (
# check for leading alignment of certain tokens
(
$tok_beg eq $next_nonblank_token
&& $is_chain_operator{$tok_beg}
&& ( $type_beg eq 'k'
|| $type_beg eq $tok_beg )
&& $nesting_depth_to_go[$i_begin] >=
$nesting_depth_to_go[$i_next_nonblank]
)
|| ( $tokens_to_go[$i_last_end] eq $token
&& $is_chain_operator{$token}
&& ( $type eq 'k' || $type eq $token )
&& $nesting_depth_to_go[$i_last_end] >=
$nesting_depth_to_go[$i_test] )
)
{
$leading_alignment_type = $next_nonblank_type;
}
}
}
#-----------------------------------------------------------
# Section D: See if the maximum line length will be exceeded
#-----------------------------------------------------------
# Quit if there are no more tokens to test
last if ( $i_test >= $imax );
# Keep going if we have not reached the limit
my $excess =
$leading_spaces +
$summed_lengths_to_go[ $i_test + 2 ] -
$starting_sum -
$maximum_line_length;
if ( $excess < 0 ) {
next;
}
elsif ( $excess == 0 ) {
# To prevent blinkers we will avoid leaving a token exactly at
# the line length limit unless it is the last token or one of
# several "good" types.
#
# The following code was a blinker with -pbp before this
# modification:
# $last_nonblank_token eq '('
# && $is_indirect_object_taker{ $paren_type
# [$paren_depth] }
# The issue causing the problem is that if the
# term [$paren_depth] gets broken across a line then
# the whitespace routine doesn't see both opening and closing
# brackets and will format like '[ $paren_depth ]'. This
# leads to an oscillation in length depending if we break
# before the closing bracket or not.
if ( $i_test + 1 < $imax
&& $next_nonblank_type ne ','
&& !$is_closing_type{$next_nonblank_type} )
{
# too long
DEBUG_BREAK_LINES && do {
$Msg .= " :too_long";
}
}
else {
next;
}
}
else {
# too long
}
# a break here makes the line too long ...
DEBUG_BREAK_LINES && do {
my $ltok = $token;
my $rtok =
$next_nonblank_token ? $next_nonblank_token : EMPTY_STRING;
my $i_testp2 = $i_test + 2;
if ( $i_testp2 > $max_index_to_go + 1 ) {
$i_testp2 = $max_index_to_go + 1;
}
if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
print {*STDOUT}
"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] str=$strength $ltok $rtok\n";
};
# Exception: allow one extra terminal token after exceeding line length
# if it would strand this token.
if ( $i_lowest == $i_test
&& $token_lengths_to_go[$i_test] > 1
&& ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
&& $rOpts_fuzzy_line_length )
{
DEBUG_BREAK_LINES && do {
$Msg .= " :do_not_strand next='$next_nonblank_type'";
};
next;
}
# Stop if here if we have a solution and the line will be too long
if ( $i_lowest >= 0 ) {
DEBUG_BREAK_LINES && do {
$Msg .=
" :Done-too_long && i_lowest=$i_lowest at itest=$i_test, imax=$imax";
};
last;
}
}
#-----------------------------------------------------
# End INNER_LOOP over the indexes in the _to_go arrays
#-----------------------------------------------------
# Be sure we return an index in the range ($ibegin .. $imax).
# We will break at imax if no other break was found.
if ( $i_lowest < 0 ) { $i_lowest = $imax }
return ( $i_lowest, $lowest_strength, $leading_alignment_type, $Msg );
} ## end sub break_lines_inner_loop
sub do_colon_breaks {
my ( $self, $ri_colon_breaks, $ri_first, $ri_last ) = @_;
# using a simple method for deciding if we are in a ?/: chain --
# this is a chain if it has multiple ?/: pairs all in order;
# otherwise not.
# Note that if line starts in a ':' we count that above as a break
my @insert_list = ();
foreach ( @{$ri_colon_breaks} ) {
my $i_question = $mate_index_to_go[$_];
if ( defined($i_question) ) {
if ( $want_break_before{'?'} ) {
$i_question = iprev_to_go($i_question);
}
if ( $i_question >= 0 ) {
push @insert_list, $i_question;
}
}
$self->insert_additional_breaks( \@insert_list, $ri_first, $ri_last );
}
return;
} ## end sub do_colon_breaks
###########################################
# CODE SECTION 11: Code to break long lists
###########################################
{ ## begin closure break_lists
# These routines and variables are involved in finding good
# places to break long lists.
use constant DEBUG_BREAK_LISTS => 0;
my (
$block_type,
$current_depth,
$depth,
$i,
$i_last_colon,
$i_line_end,
$i_line_start,
$i_last_nonblank_token,
$last_nonblank_block_type,
$last_nonblank_token,
$last_nonblank_type,
$last_old_breakpoint_count,
$minimum_depth,
$next_nonblank_block_type,
$next_nonblank_token,
$next_nonblank_type,
$old_breakpoint_count,
$starting_breakpoint_count,
$starting_depth,
$token,
$type,
$type_sequence,
);
my (
@breakpoint_stack,
@breakpoint_undo_stack,
@comma_index,
@container_type,
@identifier_count_stack,
@index_before_arrow,
@interrupted_list,
@item_count_stack,
@last_comma_index,
@last_dot_index,
@last_nonblank_type,
@old_breakpoint_count_stack,
@opening_structure_index_stack,
@rfor_semicolon_list,
@has_old_logical_breakpoints,
@rand_or_list,
@i_equals,
@override_cab3,
@type_sequence_stack,
);
# these arrays must retain values between calls
my ( @has_broken_sublist, @dont_align, @want_comma_break );
my $length_tol;
my $lp_tol_boost;
sub initialize_break_lists {
@dont_align = ();
@has_broken_sublist = ();
@want_comma_break = ();
#---------------------------------------------------
# Set tolerances to prevent formatting instabilities
#---------------------------------------------------
# Define tolerances to use when checking if closed
# containers will fit on one line. This is necessary to avoid
# formatting instability. The basic tolerance is based on the
# following:
# - Always allow for at least one extra space after a closing token so
# that we do not strand a comma or semicolon. (oneline.t).
# - Use an increased line length tolerance when -ci > -i to avoid
# blinking states (case b923 and others).
$length_tol =
1 + max( 0, $rOpts_continuation_indentation - $rOpts_indent_columns );
# In addition, it may be necessary to use a few extra tolerance spaces
# when -lp is used and/or when -xci is used. The history of this
# so far is as follows:
# FIX1: At least 3 characters were been found to be required for -lp
# to fixes cases b1059 b1063 b1117.
# FIX2: Further testing showed that we need a total of 3 extra spaces
# when -lp is set for non-lists, and at least 2 spaces when -lp and
# -xci are set.
# Fixes cases b1063 b1103 b1134 b1135 b1136 b1138 b1140 b1143 b1144
# b1145 b1146 b1147 b1148 b1151 b1152 b1153 b1154 b1156 b1157 b1164
# b1165
# FIX3: To fix cases b1169 b1170 b1171, an update was made in sub
# 'find_token_starting_list' to go back before an initial blank space.
# This fixed these three cases, and allowed the tolerances to be
# reduced to continue to fix all other known cases of instability.
# This gives the current tolerance formulation.
$lp_tol_boost = 0;
if ($rOpts_line_up_parentheses) {
# boost tol for combination -lp -xci
if ($rOpts_extended_continuation_indentation) {
$lp_tol_boost = 2;
}
# boost tol for combination -lp and any -vtc > 0, but only for
# non-list containers
else {
foreach ( keys %closing_vertical_tightness ) {
next
unless ( $closing_vertical_tightness{$_} );
$lp_tol_boost = 1; # Fixes B1193;
last;
}
}
}
# Define a level where list formatting becomes highly stressed and
# needs to be simplified. Introduced for case b1262.
# $list_stress_level = min($stress_level_alpha, $stress_level_beta + 2);
# This is now '$high_stress_level'.
return;
} ## end sub initialize_break_lists
# routine to define essential variables when we go 'up' to
# a new depth
sub check_for_new_minimum_depth {
my ( $self, $depth_t, $seqno ) = @_;
if ( $depth_t < $minimum_depth ) {
$minimum_depth = $depth_t;
# these arrays need not retain values between calls
my $old_seqno = $type_sequence_stack[$depth_t];
my $changed_seqno = !defined($old_seqno) || $old_seqno != $seqno;
$type_sequence_stack[$depth_t] = $seqno;
$override_cab3[$depth_t] = undef;
if ( $rOpts_comma_arrow_breakpoints == 3 && $seqno ) {
$override_cab3[$depth_t] = $self->[_roverride_cab3_]->{$seqno};
}
$breakpoint_stack[$depth_t] = $starting_breakpoint_count;
$container_type[$depth_t] = EMPTY_STRING;
$identifier_count_stack[$depth_t] = 0;
$index_before_arrow[$depth_t] = -1;
$interrupted_list[$depth_t] = 1;
$item_count_stack[$depth_t] = 0;
$last_nonblank_type[$depth_t] = EMPTY_STRING;
$opening_structure_index_stack[$depth_t] = -1;
$breakpoint_undo_stack[$depth_t] = undef;
$comma_index[$depth_t] = undef;
$last_comma_index[$depth_t] = undef;
$last_dot_index[$depth_t] = undef;
$old_breakpoint_count_stack[$depth_t] = undef;
$has_old_logical_breakpoints[$depth_t] = 0;
$rand_or_list[$depth_t] = [];
$rfor_semicolon_list[$depth_t] = [];
$i_equals[$depth_t] = -1;
# these arrays must retain values between calls
if ( $changed_seqno || !defined( $has_broken_sublist[$depth_t] ) ) {
$dont_align[$depth_t] = 0;
$has_broken_sublist[$depth_t] = 0;
$want_comma_break[$depth_t] = 0;
}
}
return;
} ## end sub check_for_new_minimum_depth
# routine to decide which commas to break at within a container;
# returns:
# $bp_count = number of comma breakpoints set
# $do_not_break_apart = a flag indicating if container need not
# be broken open
sub set_comma_breakpoints {
my ( $self, $dd, $rbond_strength_bias ) = @_;
my $bp_count = 0;
my $do_not_break_apart = 0;
# anything to do?
if ( $item_count_stack[$dd] ) {
# Do not break a list unless there are some non-line-ending commas.
# This avoids getting different results with only non-essential
# commas, and fixes b1192.
my $seqno = $type_sequence_stack[$dd];
my $real_comma_count =
$seqno ? $self->[_rtype_count_by_seqno_]->{$seqno}->{','} : 1;
# handle commas not in containers...
if ( $dont_align[$dd] ) {
$self->do_uncontained_comma_breaks( $dd, $rbond_strength_bias );
}
# handle commas within containers...
elsif ($real_comma_count) {
my $fbc = $forced_breakpoint_count;
# always open comma lists not preceded by keywords,
# barewords, identifiers (that is, anything that doesn't
# look like a function call)
# c250: added new sub identifier type 'S'
my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiUS]$/;
$self->table_maker(
{
depth => $dd,
i_opening_paren => $opening_structure_index_stack[$dd],
i_closing_paren => $i,
item_count => $item_count_stack[$dd],
identifier_count => $identifier_count_stack[$dd],
rcomma_index => $comma_index[$dd],
next_nonblank_type => $next_nonblank_type,
list_type => $container_type[$dd],
interrupted => $interrupted_list[$dd],
rdo_not_break_apart => \$do_not_break_apart,
must_break_open => $must_break_open,
has_broken_sublist => $has_broken_sublist[$dd],
}
);
$bp_count = $forced_breakpoint_count - $fbc;
$do_not_break_apart = 0 if $must_break_open;
}
else {
## no real commas, nothing to do
}
}
return ( $bp_count, $do_not_break_apart );
} ## end sub set_comma_breakpoints
# These types are excluded at breakpoints to prevent blinking
# Switched from excluded to included as part of fix for b1214
my %is_uncontained_comma_break_included_type;
BEGIN {
my @q = qw< k R } ) ] Y Z U w i q Q .
= **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=>;
@is_uncontained_comma_break_included_type{@q} = (1) x scalar(@q);
} ## end BEGIN
sub do_uncontained_comma_breaks {
# Handle commas not in containers...
# This is a catch-all routine for commas that we
# don't know what to do with because the don't fall
# within containers. We will bias the bond strength
# to break at commas which ended lines in the input
# file. This usually works better than just trying
# to put as many items on a line as possible. A
# downside is that if the input file is garbage it
# won't work very well. However, the user can always
# prevent following the old breakpoints with the
# -iob flag.
my ( $self, $dd, $rbond_strength_bias ) = @_;
# Check added for issue c131; an error here would be due to an
# error initializing @comma_index when entering depth $dd.
if (DEVEL_MODE) {
foreach my $ii ( @{ $comma_index[$dd] } ) {
if ( $ii < 0 || $ii > $max_index_to_go ) {
my $KK = $K_to_go[0];
my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_];
Fault(<<EOM);
Bad comma index near line $lno: i=$ii must be between 0 and $max_index_to_go
EOM
}
}
}
my $bias = -.01;
my $old_comma_break_count = 0;
foreach my $ii ( @{ $comma_index[$dd] } ) {
if ( $old_breakpoint_to_go[$ii] ) {
$old_comma_break_count++;
# Store the bias info for use by sub set_bond_strength
push @{$rbond_strength_bias}, [ $ii, $bias ];
# reduce bias magnitude to force breaks in order
$bias *= 0.99;
}
}
# Also put a break before the first comma if
# (1) there was a break there in the input, and
# (2) there was exactly one old break before the first comma break
# (3) OLD: there are multiple old comma breaks
# (3) NEW: there are one or more old comma breaks (see return example)
# (4) the first comma is at the starting level ...
# ... fixes cases b064 b065 b068 b210 b747
# (5) the batch does not start with a ci>0 [ignore a ci change by -xci]
# ... fixes b1220. If ci>0 we are in the middle of a snippet,
# maybe because -boc has been forcing out previous lines.
# For example, we will follow the user and break after
# 'print' in this snippet:
# print
# "conformability (Not the same dimension)\n",
# "\t", $have, " is ", text_unit($hu), "\n",
# "\t", $want, " is ", text_unit($wu), "\n",
# ;
#
# Another example, just one comma, where we will break after
# the return:
# return
# $x * cos($a) - $y * sin($a),
# $x * sin($a) + $y * cos($a);
# Breaking a print statement:
# print SAVEOUT
# ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
# ( $? & 128 ) ? " -- core dumped" : "", "\n";
#
# But we will not force a break after the opening paren here
# (causes a blinker):
# $heap->{stream}->set_output_filter(
# poe::filter::reference->new('myotherfreezer') ),
# ;
#
my $i_first_comma = $comma_index[$dd]->[0];
my $level_comma = $levels_to_go[$i_first_comma];
my $ci_start = $ci_levels_to_go[0];
# Here we want to use the value of ci before any -xci adjustment
if ( $ci_start && $rOpts_extended_continuation_indentation ) {
my $K0 = $K_to_go[0];
if ( $self->[_rseqno_controlling_my_ci_]->{$K0} ) { $ci_start = 0 }
}
if ( !$ci_start
&& $old_breakpoint_to_go[$i_first_comma]
&& $level_comma == $levels_to_go[0] )
{
my $ibreak = -1;
my $obp_count = 0;
foreach my $ii ( reverse( 0 .. $i_first_comma - 1 ) ) {
if ( $old_breakpoint_to_go[$ii] ) {
$obp_count++;
last if ( $obp_count > 1 );
$ibreak = $ii
if ( $levels_to_go[$ii] == $level_comma );
}
}
# Changed rule from multiple old commas to just one here:
if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
{
my $ibreak_m = $ibreak;
$ibreak_m-- if ( $types_to_go[$ibreak_m] eq 'b' );
if ( $ibreak_m >= 0 ) {
# In order to avoid blinkers we have to be fairly
# restrictive:
# OLD Rules:
# Rule 1: Do not to break before an opening token
# Rule 2: avoid breaking at ternary operators
# (see b931, which is similar to the above print example)
# Rule 3: Do not break at chain operators to fix case b1119
# - The previous test was '$typem !~ /^[\(\{\[L\?\:]$/'
# NEW Rule, replaced above rules after case b1214:
# only break at one of the included types
# Be sure to test any changes to these rules against runs
# with -l=0 such as the 'bbvt' test (perltidyrc_colin)
# series.
my $type_m = $types_to_go[$ibreak_m];
# Switched from excluded to included for b1214. If necessary
# the token could also be checked if type_m eq 'k'
if ( $is_uncontained_comma_break_included_type{$type_m} ) {
# Rule added to fix b1449:
# Do not break before a '?' if -nbot is set
# Otherwise, we may alternately arrive here and
# set the break, or not, depending on the input.
my $no_break;
my $ibreak_p = $inext_to_go[$ibreak_m];
if ( !$rOpts_break_at_old_ternary_breakpoints
&& $ibreak_p <= $max_index_to_go )
{
my $type_p = $types_to_go[$ibreak_p];
$no_break = $type_p eq '?';
}
$self->set_forced_breakpoint($ibreak)
if ( !$no_break );
}
}
}
}
return;
} ## end sub do_uncontained_comma_breaks
my %is_logical_container;
my %quick_filter;
BEGIN {
my @q = qw# if elsif unless while and or err not && | || ? : ! #;
@is_logical_container{@q} = (1) x scalar(@q);
# This filter will allow most tokens to skip past a section of code
%quick_filter = %is_assignment;
@q = qw# => . ; < > ~ #;
push @q, ',';
push @q, 'f'; # added for ';' for issue c154
@quick_filter{@q} = (1) x scalar(@q);
} ## end BEGIN
sub set_for_semicolon_breakpoints {
my ( $self, $dd ) = @_;
# Set breakpoints for semicolons in C-style 'for' containers
foreach ( @{ $rfor_semicolon_list[$dd] } ) {
$self->set_forced_breakpoint($_);
}
return;
} ## end sub set_for_semicolon_breakpoints
sub set_logical_breakpoints {
my ( $self, $dd ) = @_;
# Set breakpoints at logical operators
if (
$item_count_stack[$dd] == 0
&& $is_logical_container{ $container_type[$dd] }
|| $has_old_logical_breakpoints[$dd]
)
{
# Look for breaks in this order:
# 0 1 2 3
# or and || &&
foreach my $i ( 0 .. 3 ) {
if ( $rand_or_list[$dd][$i] ) {
foreach ( @{ $rand_or_list[$dd][$i] } ) {
$self->set_forced_breakpoint($_);
}
# break at any 'if' and 'unless' too
foreach ( @{ $rand_or_list[$dd][4] } ) {
$self->set_forced_breakpoint($_);
}
$rand_or_list[$dd] = [];
last;
}
}
}
return;
} ## end sub set_logical_breakpoints
sub is_unbreakable_container {
# never break a container of one of these types
# because bad things can happen (map1.t)
my $dd = shift;
return $is_sort_map_grep{ $container_type[$dd] };
} ## end sub is_unbreakable_container
sub break_lists {
my ( $self, $is_long_line, $rbond_strength_bias ) = @_;
#--------------------------------------------------------------------
# This routine is called once per batch, if the batch is a list, to
# set line breaks so that hierarchical structure can be displayed and
# so that list items can be vertically aligned. The output of this
# routine is stored in the array @forced_breakpoint_to_go, which is
# used by sub 'break_long_lines' to set final breakpoints. This is
# probably the most complex routine in perltidy, so I have
# broken it into pieces and over-commented it.
#--------------------------------------------------------------------
$starting_depth = $nesting_depth_to_go[0];
$block_type = SPACE;
$current_depth = $starting_depth;
$i = -1;
$i_last_colon = -1;
$i_line_end = -1;
$i_line_start = -1;
$last_nonblank_token = ';';
$last_nonblank_type = ';';
$last_nonblank_block_type = SPACE;
$last_old_breakpoint_count = 0;
$minimum_depth = $current_depth + 1; # forces update in check below
$old_breakpoint_count = 0;
$starting_breakpoint_count = $forced_breakpoint_count;
$token = ';';
$type = ';';
$type_sequence = EMPTY_STRING;
my $total_depth_variation = 0;
my $i_old_assignment_break;
my $depth_last = $starting_depth;
my $comma_follows_last_closing_token;
$self->check_for_new_minimum_depth( $current_depth,
$parent_seqno_to_go[0] )
if ( $current_depth < $minimum_depth );
my $i_want_previous_break = -1;
my $saw_good_breakpoint;
#----------------------------------------
# Main loop over all tokens in this batch
#----------------------------------------
while ( ++$i <= $max_index_to_go ) {
if ( $type ne 'b' ) {
$i_last_nonblank_token = $i - 1;
$last_nonblank_type = $type;
$last_nonblank_token = $token;
$last_nonblank_block_type = $block_type;
}
$type = $types_to_go[$i];
$block_type = $block_type_to_go[$i];
$token = $tokens_to_go[$i];
$type_sequence = $type_sequence_to_go[$i];
my $i_next_nonblank = $inext_to_go[$i];
$next_nonblank_type = $types_to_go[$i_next_nonblank];
$next_nonblank_token = $tokens_to_go[$i_next_nonblank];
$next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
#-------------------------------------------
# Loop Section A: Look for special breakpoints...
#-------------------------------------------
# set break if flag was set
if ( $i_want_previous_break >= 0 ) {
$self->set_forced_breakpoint($i_want_previous_break);
$i_want_previous_break = -1;
}
$last_old_breakpoint_count = $old_breakpoint_count;
# Check for a good old breakpoint ..
if ( $old_breakpoint_to_go[$i] ) {
( $i_want_previous_break, $i_old_assignment_break ) =
$self->examine_old_breakpoint( $i_next_nonblank,
$i_want_previous_break, $i_old_assignment_break );
}
next if ( $type eq 'b' );
$depth = $nesting_depth_to_go[ $i + 1 ];
$total_depth_variation += abs( $depth - $depth_last );
$depth_last = $depth;
# safety check - be sure we always break after a comment
# Shouldn't happen .. an error here probably means that the
# nobreak flag did not get turned off correctly during
# formatting.
if ( $type eq '#' ) {
if ( $i != $max_index_to_go ) {
if (DEVEL_MODE) {
Fault(<<EOM);
Non-fatal program bug: backup logic required to break after a comment
EOM
}
$nobreak_to_go[$i] = 0;
$self->set_forced_breakpoint($i);
} ## end if ( $i != $max_index_to_go)
} ## end if ( $type eq '#' )
# Force breakpoints at certain tokens in long lines.
# Note that such breakpoints will be undone later if these tokens
# are fully contained within parens on a line.
if (
# break before a keyword within a line
$type eq 'k'
&& $i > 0
# if one of these keywords:
&& $is_if_unless_while_until_for_foreach{$token}
# but do not break at something like '1 while'
&& ( $last_nonblank_type ne 'n' || $i > 2 )
# and let keywords follow a closing 'do' brace
&& ( !$last_nonblank_block_type
|| $last_nonblank_block_type ne 'do' )
&& (
$is_long_line
# or container is broken (by side-comment, etc)
|| (
$next_nonblank_token eq '('
&& ( !defined( $mate_index_to_go[$i_next_nonblank] )
|| $mate_index_to_go[$i_next_nonblank] < $i )
)
)
)
{
$self->set_forced_breakpoint( $i - 1 );
}
# remember locations of '||' and '&&' for possible breaks if we
# decide this is a long logical expression.
if ( $type eq '||' ) {
push @{ $rand_or_list[$depth][2] }, $i;
++$has_old_logical_breakpoints[$depth]
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints );
}
elsif ( $type eq '&&' ) {
push @{ $rand_or_list[$depth][3] }, $i;
++$has_old_logical_breakpoints[$depth]
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints );
}
elsif ( $type eq 'f' ) {
push @{ $rfor_semicolon_list[$depth] }, $i;
}
elsif ( $type eq 'k' ) {
if ( $token eq 'and' ) {
push @{ $rand_or_list[$depth][1] }, $i;
++$has_old_logical_breakpoints[$depth]
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints );
}
# break immediately at 'or's which are probably not in a logical
# block -- but we will break in logical breaks below so that
# they do not add to the forced_breakpoint_count
elsif ( $token eq 'or' ) {
push @{ $rand_or_list[$depth][0] }, $i;
++$has_old_logical_breakpoints[$depth]
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints );
if ( $is_logical_container{ $container_type[$depth] } ) {
}
else {
if ($is_long_line) { $self->set_forced_breakpoint($i) }
elsif ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints )
{
$saw_good_breakpoint = 1;
}
else {
## not a good break
}
}
}
elsif ( $token eq 'if' || $token eq 'unless' ) {
push @{ $rand_or_list[$depth][4] }, $i;
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints )
{
$self->set_forced_breakpoint($i);
}
}
else {
## not one of: 'and' 'or' 'if' 'unless'
}
}
elsif ( $is_assignment{$type} ) {
$i_equals[$depth] = $i;
}
else {
## not a good breakpoint type
}
#-----------------------------------------
# Loop Section B: Handle a sequenced token
#-----------------------------------------
if ($type_sequence) {
$self->break_lists_type_sequence;
}
#------------------------------------------
# Loop Section C: Handle Increasing Depth..
#------------------------------------------
# hardened against bad input syntax: depth jump must be 1 and type
# must be opening..fixes c102
if ( $depth == $current_depth + 1 && $is_opening_type{$type} ) {
$self->break_lists_increasing_depth();
}
#------------------------------------------
# Loop Section D: Handle Decreasing Depth..
#------------------------------------------
# hardened against bad input syntax: depth jump must be 1 and type
# must be closing .. fixes c102
elsif ( $depth == $current_depth - 1 && $is_closing_type{$type} ) {
$self->break_lists_decreasing_depth();
$comma_follows_last_closing_token =
$next_nonblank_type eq ',' || $next_nonblank_type eq '=>';
}
else {
## not a depth change
}
#----------------------------------
# Loop Section E: Handle this token
#----------------------------------
$current_depth = $depth;
# most token types can skip the rest of this loop
next if ( !$quick_filter{$type} );
# Turn off comma alignment if we are sure that this is not a list
# environment. To be safe, we will do this if we see certain
# non-list tokens, such as ';', '=', and also the environment is
# not a list.
## $type =~ /^[\;\<\>\~f]$/ || $is_assignment{$type}
if ( $is_non_list_type{$type} ) {
if ( !$self->is_in_list_by_i($i) ) {
$dont_align[$depth] = 1;
$want_comma_break[$depth] = 0;
$index_before_arrow[$depth] = -1;
# no special comma breaks in C-style 'for' terms (c154)
if ( $type eq 'f' ) { $last_comma_index[$depth] = undef }
}
}
# handle any commas
elsif ( $type eq ',' ) {
$self->study_comma($comma_follows_last_closing_token);
}
# handle comma-arrow
elsif ( $type eq '=>' ) {
next if ( $last_nonblank_type eq '=>' );
next if $rOpts_break_at_old_comma_breakpoints;
next
if ( $rOpts_comma_arrow_breakpoints == 3
&& !defined( $override_cab3[$depth] ) );
$want_comma_break[$depth] = 1;
$index_before_arrow[$depth] = $i_last_nonblank_token;
next;
}
elsif ( $type eq '.' ) {
$last_dot_index[$depth] = $i;
}
else {
# error : no code to handle a type in %quick_filter
DEVEL_MODE && Fault(<<EOM);
Missing code to handle token type '$type' which is in the quick_filter
EOM
}
} ## end while ( ++$i <= $max_index_to_go)
#-------------------------------------------
# END of loop over all tokens in this batch
# Now set breaks for any unfinished lists ..
#-------------------------------------------
foreach my $dd ( reverse( $minimum_depth .. $current_depth ) ) {
$interrupted_list[$dd] = 1;
$has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
$self->set_comma_breakpoints( $dd, $rbond_strength_bias )
if ( $item_count_stack[$dd] );
$self->set_logical_breakpoints($dd)
if ( $has_old_logical_breakpoints[$dd] );
$self->set_for_semicolon_breakpoints($dd);
# break open container...
my $i_opening = $opening_structure_index_stack[$dd];
if ( defined($i_opening) && $i_opening >= 0 ) {
my $no_break = (
is_unbreakable_container($dd)
# Avoid a break which would place an isolated ' or "
# on a line
|| ( $type eq 'Q'
&& $i_opening >= $max_index_to_go - 2
&& ( $token eq "'" || $token eq '"' ) )
);
$self->set_forced_breakpoint($i_opening)
if ( !$no_break );
}
} ## end for ( my $dd = $current_depth...)
#----------------------------------------
# Return the flag '$saw_good_breakpoint'.
#----------------------------------------
# This indicates if the input file had some good breakpoints. This
# flag will be used to force a break in a line shorter than the
# allowed line length.
if ( $has_old_logical_breakpoints[$current_depth] ) {
$saw_good_breakpoint = 1;
}
# A complex line with one break at an = has a good breakpoint.
# This is not complex ($total_depth_variation=0):
# $res1
# = 10;
#
# This is complex ($total_depth_variation=6):
# $res2 =
# (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
# The check ($i_old_.. < $max_index_to_go) was added to fix b1333
elsif ($i_old_assignment_break
&& $total_depth_variation > 4
&& $old_breakpoint_count == 1
&& $i_old_assignment_break < $max_index_to_go )
{
$saw_good_breakpoint = 1;
}
else {
## not a good breakpoint
}
return $saw_good_breakpoint;
} ## end sub break_lists
sub study_comma {
# study and store info for a list comma
my ( $self, $comma_follows_last_closing_token ) = @_;
$last_dot_index[$depth] = undef;
$last_comma_index[$depth] = $i;
# break here if this comma follows a '=>'
# but not if there is a side comment after the comma
if ( $want_comma_break[$depth] ) {
if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
if ($rOpts_comma_arrow_breakpoints) {
$want_comma_break[$depth] = 0;
return;
}
}
$self->set_forced_breakpoint($i)
unless ( $next_nonblank_type eq '#' );
# break before the previous token if it looks safe
# Example of something that we will not try to break before:
# DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
# Also we don't want to break at a binary operator (like +):
# $c->createOval(
# $x + $R, $y +
# $R => $x - $R,
# $y - $R, -fill => 'black',
# );
my $ibreak = $index_before_arrow[$depth] - 1;
if ( $ibreak > 0
&& $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
{
if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
# don't break before a comma, as in the following:
# ( LONGER_THAN,=> 1,
# EIGHTY_CHARACTERS,=> 2,
# CAUSES_FORMATTING,=> 3,
# LIKE_THIS,=> 4,
# );
# This example is for -tso but should be general rule
if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
&& $tokens_to_go[ $ibreak + 1 ] ne ',' )
{
$self->set_forced_breakpoint($ibreak);
}
}
}
$want_comma_break[$depth] = 0;
$index_before_arrow[$depth] = -1;
# handle list which mixes '=>'s and ','s:
# treat any list items so far as an interrupted list
$interrupted_list[$depth] = 1;
return;
}
# Break after all commas above starting depth...
# But only if the last closing token was followed by a comma,
# to avoid breaking a list operator (issue c119)
if ( $depth < $starting_depth
&& $comma_follows_last_closing_token
&& !$dont_align[$depth] )
{
$self->set_forced_breakpoint($i)
unless ( $next_nonblank_type eq '#' );
return;
}
# add this comma to the list..
my $item_count = $item_count_stack[$depth];
if ( $item_count == 0 ) {
# but do not form a list with no opening structure
# for example:
# open INFILE_COPY, ">$input_file_copy"
# or die ("very long message");
if ( ( $opening_structure_index_stack[$depth] < 0 )
&& $self->is_in_block_by_i($i) )
{
$dont_align[$depth] = 1;
}
}
$comma_index[$depth][$item_count] = $i;
++$item_count_stack[$depth];
if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
$identifier_count_stack[$depth]++;
}
return;
} ## end sub study_comma
my %poor_types;
my %poor_keywords;
my %poor_next_types;
my %poor_next_keywords;
BEGIN {
# Setup filters for detecting very poor breaks to ignore.
# b1097: old breaks after type 'L' and before 'R' are poor
# b1450: old breaks at 'eq' and related operators are poor
my @q = qw(== <= >= !=);
@{poor_types}{@q} = (1) x scalar(@q);
@{poor_next_types}{@q} = (1) x scalar(@q);
$poor_types{'L'} = 1;
$poor_next_types{'R'} = 1;
@q = qw(eq ne le ge lt gt);
@{poor_keywords}{@q} = (1) x scalar(@q);
@{poor_next_keywords}{@q} = (1) x scalar(@q);
} ## end BEGIN
sub examine_old_breakpoint {
my ( $self, $i_next_nonblank, $i_want_previous_break,
$i_old_assignment_break )
= @_;
# Look at an old breakpoint and set/update certain flags:
# Given indexes of three tokens in this batch:
# $i_next_nonblank - index of the next nonblank token
# $i_want_previous_break - we want a break before this index
# $i_old_assignment_break - the index of an '=' or equivalent
# Update:
# $old_breakpoint_count - a counter to increment unless poor break
# Update and return:
# $i_want_previous_break
# $i_old_assignment_break
#-----------------------
# Filter out poor breaks
#-----------------------
# Just return if this is a poor break and pretend it does not exist.
# Otherwise, poor breaks made under stress can cause instability.
my $poor_break;
if ( $type eq 'k' ) { $poor_break ||= $poor_keywords{$token} }
else { $poor_break ||= $poor_types{$type} }
if ( $next_nonblank_type eq 'k' ) {
$poor_break ||= $poor_next_keywords{$next_nonblank_token};
}
else { $poor_break ||= $poor_next_types{$next_nonblank_type} }
# Also ignore any high stress level breaks; fixes b1395
$poor_break ||= $levels_to_go[$i] >= $high_stress_level;
if ($poor_break) { goto RETURN }
#--------------------------------------------
# Not a poor break, so continue to examine it
#--------------------------------------------
$old_breakpoint_count++;
$i_line_end = $i;
$i_line_start = $i_next_nonblank;
#---------------------------------------
# Do we want to break before this token?
#---------------------------------------
# Break before certain keywords if user broke there and
# this is a 'safe' break point. The idea is to retain
# any preferred breaks for sequential list operations,
# like a schwartzian transform.
if ($rOpts_break_at_old_keyword_breakpoints) {
if (
$next_nonblank_type eq 'k'
&& $is_keyword_returning_list{$next_nonblank_token}
&& ( $type =~ /^[=\)\]\}Riw]$/
|| $type eq 'k' && $is_keyword_returning_list{$token} )
)
{
# we actually have to set this break next time through
# the loop because if we are at a closing token (such
# as '}') which forms a one-line block, this break might
# get undone.
# But do not do this at an '=' if:
# - the user wants breaks before an equals (b434 b903)
# - or -naws is set (can be unstable, see b1354)
my $skip = $type eq '='
&& ( $want_break_before{$type}
|| !$rOpts_add_whitespace );
$i_want_previous_break = $i
unless ($skip);
}
}
# Break before attributes if user broke there
if ($rOpts_break_at_old_attribute_breakpoints) {
if ( $next_nonblank_type eq 'A' ) {
$i_want_previous_break = $i;
}
}
#---------------------------------
# Is this an old assignment break?
#---------------------------------
if ( $is_assignment{$type} ) {
$i_old_assignment_break = $i;
}
elsif ( $is_assignment{$next_nonblank_type} ) {
$i_old_assignment_break = $i_next_nonblank;
}
else {
## not old assignment break
}
RETURN:
return ( $i_want_previous_break, $i_old_assignment_break );
} ## end sub examine_old_breakpoint
sub break_lists_type_sequence {
my ($self) = @_;
# We have encountered a sequenced token while setting list breakpoints
# if closing type, one of } ) ] :
if ( $is_closing_sequence_token{$token} ) {
if ( $type eq ':' ) {
$i_last_colon = $i;
# retain break at a ':' line break
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_ternary_breakpoints
&& $levels_to_go[$i] < $high_stress_level )
{
$self->set_forced_breakpoint($i);
# Break at a previous '=', but only if it is before
# the mating '?'. Mate_index test fixes b1287.
my $ieq = $i_equals[$depth];
my $mix = $mate_index_to_go[$i];
if ( !defined($mix) ) { $mix = -1 }
if ( $ieq > 0 && $ieq < $mix ) {
$self->set_forced_breakpoint( $i_equals[$depth] );
$i_equals[$depth] = -1;
}
}
}
# handle any postponed closing breakpoints
if ( has_postponed_breakpoint($type_sequence) ) {
my $inc = ( $type eq ':' ) ? 0 : 1;
if ( $i >= $inc ) {
$self->set_forced_breakpoint( $i - $inc );
}
}
}
# must be opening token, one of { ( [ ?
else {
# set breaks at ?/: if they will get separated (and are
# not a ?/: chain), or if the '?' is at the end of the
# line
if ( $token eq '?' ) {
my $i_colon = $mate_index_to_go[$i];
if (
!defined($i_colon) # the ':' is not in this batch
|| $i == 0 # this '?' is the first token of the line
|| $i == $max_index_to_go # or this '?' is the last token
)
{
# don't break if # this has a side comment, and
# don't break at a '?' if preceded by ':' on
# this line of previous ?/: pair on this line.
# This is an attempt to preserve a chain of ?/:
# expressions (elsif2.t).
if (
(
$i_last_colon < 0
|| $parent_seqno_to_go[$i_last_colon] !=
$parent_seqno_to_go[$i]
)
&& $tokens_to_go[$max_index_to_go] ne '#'
)
{
$self->set_forced_breakpoint($i);
}
$self->set_closing_breakpoint($i);
}
}
# must be one of { ( [
else {
# do requested -lp breaks at the OPENING token for BROKEN
# blocks. NOTE: this can be done for both -lp and -xlp,
# but only -xlp can really take advantage of this. So this
# is currently restricted to -xlp to avoid excess changes to
# existing -lp formatting.
if ( $rOpts_extended_line_up_parentheses
&& !defined( $mate_index_to_go[$i] ) )
{
my $lp_object =
$self->[_rlp_object_by_seqno_]->{$type_sequence};
if ($lp_object) {
my $K_begin_line = $lp_object->get_K_begin_line();
my $i_begin_line = $K_begin_line - $K_to_go[0];
$self->set_forced_lp_break( $i_begin_line, $i );
}
}
}
}
return;
} ## end sub break_lists_type_sequence
sub break_lists_increasing_depth {
my ($self) = @_;
#--------------------------------------------
# prepare for a new list when depth increases
# token $i is a '(','{', or '['
#--------------------------------------------
#----------------------------------------------------------
# BEGIN initialize depth arrays
# ... use the same order as sub check_for_new_minimum_depth
#----------------------------------------------------------
$type_sequence_stack[$depth] = $type_sequence;
$override_cab3[$depth] = undef;
if ( $rOpts_comma_arrow_breakpoints == 3 && $type_sequence ) {
$override_cab3[$depth] =
$self->[_roverride_cab3_]->{$type_sequence};
}
$breakpoint_stack[$depth] = $forced_breakpoint_count;
$container_type[$depth] =
# k => && || ? : .
$is_container_label_type{$last_nonblank_type}
? $last_nonblank_token
: EMPTY_STRING;
$identifier_count_stack[$depth] = 0;
$index_before_arrow[$depth] = -1;
$interrupted_list[$depth] = 0;
$item_count_stack[$depth] = 0;
$last_nonblank_type[$depth] = $last_nonblank_type;
$opening_structure_index_stack[$depth] = $i;
$breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
$comma_index[$depth] = undef;
$last_comma_index[$depth] = undef;
$last_dot_index[$depth] = undef;
$old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
$has_old_logical_breakpoints[$depth] = 0;
$rand_or_list[$depth] = [];
$rfor_semicolon_list[$depth] = [];
$i_equals[$depth] = -1;
# if line ends here then signal closing token to break
if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) {
$self->set_closing_breakpoint($i);
}
# Not all lists of values should be vertically aligned..
$dont_align[$depth] =
# code BLOCKS are handled at a higher level
##( $block_type ne EMPTY_STRING )
$block_type
# certain paren lists
|| ( $type eq '(' ) && (
# it does not usually look good to align a list of
# identifiers in a parameter list, as in:
# my($var1, $var2, ...)
# (This test should probably be refined, for now I'm just
# testing for any keyword)
( $last_nonblank_type eq 'k' )
# a trailing '(' usually indicates a non-list
|| ( $next_nonblank_type eq '(' )
);
$has_broken_sublist[$depth] = 0;
$want_comma_break[$depth] = 0;
#----------------------------
# END initialize depth arrays
#----------------------------
# patch to outdent opening brace of long if/for/..
# statements (like this one). See similar coding in
# set_continuation breaks. We have also catch it here for
# short line fragments which otherwise will not go through
# break_long_lines.
if (
$block_type
# if we have the ')' but not its '(' in this batch..
&& ( $last_nonblank_token eq ')' )
&& !defined( $mate_index_to_go[$i_last_nonblank_token] )
# and user wants brace to left
&& !$rOpts_opening_brace_always_on_right
&& ( $type eq '{' ) # should be true
&& ( $token eq '{' ) # should be true
)
{
$self->set_forced_breakpoint( $i - 1 );
}
return;
} ## end sub break_lists_increasing_depth
sub break_lists_decreasing_depth {
my ( $self, $rbond_strength_bias ) = @_;
# We have arrived at a closing container token in sub break_lists:
# the token at index $i is one of these: ')','}', ']'
# A number of important breakpoints for this container can now be set
# based on the information that we have collected. This includes:
# - breaks at commas to format tables
# - breaks at certain logical operators and other good breakpoints
# - breaks at opening and closing containers if needed by selected
# formatting styles
# These breaks are made by calling sub 'set_forced_breakpoint'
$self->check_for_new_minimum_depth( $depth, $parent_seqno_to_go[$i] )
if ( $depth < $minimum_depth );
# force all outer logical containers to break after we see on
# old breakpoint
$has_old_logical_breakpoints[$depth] ||=
$has_old_logical_breakpoints[$current_depth];
# Patch to break between ') {' if the paren list is broken.
# There is similar logic in break_long_lines for
# non-broken lists.
if ( $token eq ')'
&& $next_nonblank_block_type
&& $interrupted_list[$current_depth]
&& $next_nonblank_type eq '{'
&& !$rOpts_opening_brace_always_on_right )
{
$self->set_forced_breakpoint($i);
}
#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
#-----------------------------------------------------------------
# Set breaks at commas to display a table of values if appropriate
#-----------------------------------------------------------------
my ( $bp_count, $do_not_break_apart ) = ( 0, 0 );
( $bp_count, $do_not_break_apart ) =
$self->set_comma_breakpoints( $current_depth, $rbond_strength_bias )
if ( $item_count_stack[$current_depth] );
#-----------------------------------------------------------
# Now set flags needed to decide if we should break open the
# container ... This is a long rambling section which has
# grown over time to handle all situations.
#-----------------------------------------------------------
my $i_opening = $opening_structure_index_stack[$current_depth];
my $saw_opening_structure = ( $i_opening >= 0 );
my $lp_object;
if ( $rOpts_line_up_parentheses && $saw_opening_structure ) {
$lp_object = $self->[_rlp_object_by_seqno_]
->{ $type_sequence_to_go[$i_opening] };
}
# this term is long if we had to break at interior commas..
my $is_long_term = $bp_count > 0;
# If this is a short container with one or more comma arrows,
# then we will mark it as a long term to open it if requested.
# $rOpts_comma_arrow_breakpoints =
# 0 - open only if comma precedes closing brace
# 1 - stable: except for one line blocks
# 2 - try to form 1 line blocks
# 3 - ignore =>
# 4 - always open up if vt=0
# 5 - stable: even for one line blocks if vt=0
my $cab_flag = $rOpts_comma_arrow_breakpoints;
# replace -cab=3 if overriden
if ( $cab_flag == 3 && $type_sequence ) {
my $test_cab = $self->[_roverride_cab3_]->{$type_sequence};
if ( defined($test_cab) ) { $cab_flag = $test_cab }
}
# PATCH: Modify the -cab flag if we are not processing a list:
# We only want the -cab flag to apply to list containers, so
# for non-lists we use the default and stable -cab=5 value.
# Fixes case b939a.
if ( $type_sequence && !$self->[_ris_list_by_seqno_]->{$type_sequence} )
{
$cab_flag = 5;
}
# Ignore old breakpoints when under stress.
# Fixes b1203 b1204 as well as b1197-b1200.
# But not if -lp: fixes b1264, b1265. NOTE: rechecked with
# b1264 to see if this check is still required at all, and
# these still require a check, but at higher level beta+3
# instead of beta: b1193 b780
if ( $saw_opening_structure
&& !$lp_object
&& $levels_to_go[$i_opening] >= $high_stress_level )
{
$cab_flag = 2;
# Do not break hash braces under stress (fixes b1238)
$do_not_break_apart ||= $types_to_go[$i_opening] eq 'L';
# This option fixes b1235, b1237, b1240 with old and new
# -lp, but formatting is nicer with next option.
## $is_long_term ||=
## $levels_to_go[$i_opening] > $stress_level_beta + 1;
# This option fixes b1240 but not b1235, b1237 with new -lp,
# but this gives better formatting than the previous option.
# TODO: see if stress_level_alpha should also be considered
$do_not_break_apart ||=
$levels_to_go[$i_opening] > $stress_level_beta;
}
if ( !$is_long_term
&& $saw_opening_structure
&& $is_opening_token{ $tokens_to_go[$i_opening] }
&& $index_before_arrow[ $depth + 1 ] > 0
&& !$opening_vertical_tightness{ $tokens_to_go[$i_opening] } )
{
$is_long_term =
$cab_flag == 4
|| $cab_flag == 0 && $last_nonblank_token eq ','
|| $cab_flag == 5 && $old_breakpoint_to_go[$i_opening];
}
# mark term as long if the length between opening and closing
# parens exceeds allowed line length
if ( !$is_long_term && $saw_opening_structure ) {
my $i_opening_minus = $self->find_token_starting_list($i_opening);
my $excess = $self->excess_line_length( $i_opening_minus, $i );
# Use standard spaces for indentation of lists in -lp mode
# if it gives a longer line length. This helps to avoid an
# instability due to forming and breaking one-line blocks.
# This fixes case b1314.
my $indentation = $leading_spaces_to_go[$i_opening_minus];
if ( ref($indentation)
&& $self->[_ris_broken_container_]->{$type_sequence} )
{
my $lp_spaces = $indentation->get_spaces();
my $std_spaces = $indentation->get_standard_spaces();
my $diff = $std_spaces - $lp_spaces;
if ( $diff > 0 ) { $excess += $diff }
}
my $tol = $length_tol;
# boost tol for an -lp container
if (
$lp_tol_boost
&& $lp_object
&& ( $rOpts_extended_continuation_indentation
|| !$self->[_ris_list_by_seqno_]->{$type_sequence} )
)
{
$tol += $lp_tol_boost;
}
# Patch to avoid blinking with -bbxi=2 and -cab=2
# in which variations in -ci cause unstable formatting
# in edge cases. We just always add one ci level so that
# the formatting is independent of the -BBX results.
# Fixes cases b1137 b1149 b1150 b1155 b1158 b1159 b1160
# b1161 b1166 b1167 b1168
if ( !$ci_levels_to_go[$i_opening]
&& $self->[_rbreak_before_container_by_seqno_]->{$type_sequence}
)
{
$tol += $rOpts_continuation_indentation;
}
$is_long_term = $excess + $tol > 0;
}
# We've set breaks after all comma-arrows. Now we have to
# undo them if this can be a one-line block
# (the only breakpoints set will be due to comma-arrows)
if (
# user doesn't require breaking after all comma-arrows
( $cab_flag != 0 ) && ( $cab_flag != 4 )
# and if the opening structure is in this batch
&& $saw_opening_structure
# and either on the same old line
&& (
$old_breakpoint_count_stack[$current_depth] ==
$last_old_breakpoint_count
# or user wants to form long blocks with arrows
|| $cab_flag == 2
)
# and we made breakpoints between the opening and closing
&& ( $breakpoint_undo_stack[$current_depth] <
$forced_breakpoint_undo_count )
# and this block is short enough to fit on one line
# Note: use < because need 1 more space for possible comma
&& !$is_long_term
)
{
$self->undo_forced_breakpoint_stack(
$breakpoint_undo_stack[$current_depth] );
}
# now see if we have any comma breakpoints left
my $has_comma_breakpoints =
( $breakpoint_stack[$current_depth] != $forced_breakpoint_count );
# update broken-sublist flag of the outer container
$has_broken_sublist[$depth] =
$has_broken_sublist[$depth]
|| $has_broken_sublist[$current_depth]
|| $is_long_term
|| $has_comma_breakpoints;
# Having come to the closing ')', '}', or ']', now we have to decide
# if we should 'open up' the structure by placing breaks at the
# opening and closing containers. This is a tricky decision. Here
# are some of the basic considerations:
#
# -If this is a BLOCK container, then any breakpoints will have
# already been set (and according to user preferences), so we need do
# nothing here.
#
# -If we have a comma-separated list for which we can align the list
# items, then we need to do so because otherwise the vertical aligner
# cannot currently do the alignment.
#
# -If this container does itself contain a container which has been
# broken open, then it should be broken open to properly show the
# structure.
#
# -If there is nothing to align, and no other reason to break apart,
# then do not do it.
#
# We will not break open the parens of a long but 'simple' logical
# expression. For example:
#
# This is an example of a simple logical expression and its formatting:
#
# if ( $bigwasteofspace1 && $bigwasteofspace2
# || $bigwasteofspace3 && $bigwasteofspace4 )
#
# Most people would prefer this than the 'spacey' version:
#
# if (
# $bigwasteofspace1 && $bigwasteofspace2
# || $bigwasteofspace3 && $bigwasteofspace4
# )
#
# To illustrate the rules for breaking logical expressions, consider:
#
# FULLY DENSE:
# if ( $opt_excl
# and ( exists $ids_excl_uc{$id_uc}
# or grep $id_uc =~ /$_/, @ids_excl_uc ))
#
# This is on the verge of being difficult to read. The current
# default is to open it up like this:
#
# DEFAULT:
# if (
# $opt_excl
# and ( exists $ids_excl_uc{$id_uc}
# or grep $id_uc =~ /$_/, @ids_excl_uc )
# )
#
# This is a compromise which tries to avoid being too dense and to
# spacey. A more spaced version would be:
#
# SPACEY:
# if (
# $opt_excl
# and (
# exists $ids_excl_uc{$id_uc}
# or grep $id_uc =~ /$_/, @ids_excl_uc
# )
# )
#
# Some people might prefer the spacey version -- an option could be
# added. The innermost expression contains a long block '( exists
# $ids_... ')'.
#
# Here is how the logic goes: We will force a break at the 'or' that
# the innermost expression contains, but we will not break apart its
# opening and closing containers because (1) it contains no
# multi-line sub-containers itself, and (2) there is no alignment to
# be gained by breaking it open like this
#
# and (
# exists $ids_excl_uc{$id_uc}
# or grep $id_uc =~ /$_/, @ids_excl_uc
# )
#
# (although this looks perfectly ok and might be good for long
# expressions). The outer 'if' container, though, contains a broken
# sub-container, so it will be broken open to avoid too much density.
# Also, since it contains no 'or's, there will be a forced break at
# its 'and'.
# Handle the experimental flag --break-open-compact-parens
# NOTE: This flag is not currently used and may eventually be removed.
# If this flag is set, we will implement it by
# pretending we did not see the opening structure, since in that case
# parens always get opened up.
if ( $saw_opening_structure
&& $rOpts_break_open_compact_parens )
{
# This parameter is a one-character flag, as follows:
# '0' matches no parens -> break open NOT OK
# '1' matches all parens -> break open OK
# Other values are same as used by the weld-exclusion-list
my $flag = $rOpts_break_open_compact_parens;
if ( $flag eq '*'
|| $flag eq '1' )
{
$saw_opening_structure = 0;
}
else {
# NOTE: $seqno will be equal to closure var $type_sequence here
my $seqno = $type_sequence_to_go[$i_opening];
$saw_opening_structure =
!$self->match_paren_control_flag( $seqno, $flag );
}
}
# Set some more flags telling something about this container..
my $is_simple_logical_expression;
if ( $item_count_stack[$current_depth] == 0
&& $saw_opening_structure
&& $tokens_to_go[$i_opening] eq '('
&& $is_logical_container{ $container_type[$current_depth] } )
{
# This seems to be a simple logical expression with
# no existing breakpoints. Set a flag to prevent
# opening it up.
if ( !$has_comma_breakpoints ) {
$is_simple_logical_expression = 1;
}
#---------------------------------------------------
# This seems to be a simple logical expression with
# breakpoints (broken sublists, for example). Break
# at all 'or's and '||'s.
#---------------------------------------------------
else {
$self->set_logical_breakpoints($current_depth);
}
}
# break long terms at any C-style for semicolons (c154)
if ( $is_long_term
&& @{ $rfor_semicolon_list[$current_depth] } )
{
$self->set_for_semicolon_breakpoints($current_depth);
# and open up a long 'for' or 'foreach' container to allow
# leading term alignment unless -lp is used.
$has_comma_breakpoints = 1 unless ($lp_object);
}
#----------------------------------------------------------------
# FINALLY: Break open container according to the flags which have
# been set.
#----------------------------------------------------------------
if (
# breaks for code BLOCKS are handled at a higher level
!$block_type
# we do not need to break at the top level of an 'if'
# type expression
&& !$is_simple_logical_expression
## modification to keep ': (' containers vertically tight;
## but probably better to let user set -vt=1 to avoid
## inconsistency with other paren types
## && ($container_type[$current_depth] ne ':')
# otherwise, we require one of these reasons for breaking:
&& (
# - this term has forced line breaks
$has_comma_breakpoints
# - the opening container is separated from this batch
# for some reason (comment, blank line, code block)
# - this is a non-paren container spanning multiple lines
|| !$saw_opening_structure
# - this is a long block contained in another breakable
# container
|| $is_long_term && !$self->is_in_block_by_i($i_opening)
)
)
{
# do special -lp breaks at the CLOSING token for INTACT
# blocks (because we might not do them if the block does
# not break open)
if ($lp_object) {
my $K_begin_line = $lp_object->get_K_begin_line();
my $i_begin_line = $K_begin_line - $K_to_go[0];
$self->set_forced_lp_break( $i_begin_line, $i_opening );
}
# break after opening structure.
# note: break before closing structure will be automatic
if ( $minimum_depth <= $current_depth ) {
if ( $i_opening >= 0 ) {
if ( !$do_not_break_apart
&& !is_unbreakable_container($current_depth) )
{
$self->set_forced_breakpoint($i_opening);
# Do not let brace types L/R use vertical tightness
# flags to recombine if we have to break on length
# because instability is possible if both vt and vtc
# flags are set ... see issue b1444.
if ( $is_long_term
&& $types_to_go[$i_opening] eq 'L'
&& $opening_vertical_tightness{'{'}
&& $closing_vertical_tightness{'}'} )
{
my $seqno = $type_sequence_to_go[$i_opening];
if ($seqno) {
$self->[_rbreak_container_]->{$seqno} = 1;
}
}
}
}
# break at ',' of lower depth level before opening token
if ( $last_comma_index[$depth] ) {
$self->set_forced_breakpoint( $last_comma_index[$depth] );
}
# break at '.' of lower depth level before opening token
if ( $last_dot_index[$depth] ) {
$self->set_forced_breakpoint( $last_dot_index[$depth] );
}
# break before opening structure if preceded by another
# closing structure and a comma. This is normally
# done by the previous closing brace, but not
# if it was a one-line block.
if ( $i_opening > 2 ) {
my $i_prev =
( $types_to_go[ $i_opening - 1 ] eq 'b' )
? $i_opening - 2
: $i_opening - 1;
my $type_prev = $types_to_go[$i_prev];
my $token_prev = $tokens_to_go[$i_prev];
if (
$type_prev eq ','
&& ( $types_to_go[ $i_prev - 1 ] eq ')'
|| $types_to_go[ $i_prev - 1 ] eq '}' )
)
{
$self->set_forced_breakpoint($i_prev);
}
# also break before something like ':(' or '?('
# if appropriate.
elsif ($type_prev =~ /^([k\:\?]|&&|\|\|)$/
&& $want_break_before{$token_prev} )
{
$self->set_forced_breakpoint($i_prev);
}
else {
## not a breakpoint
}
}
}
# break after comma following closing structure
if ( $types_to_go[ $i + 1 ] eq ',' ) {
$self->set_forced_breakpoint( $i + 1 );
}
# break before an '=' following closing structure
if (
$is_assignment{$next_nonblank_type}
&& ( $breakpoint_stack[$current_depth] !=
$forced_breakpoint_count )
)
{
$self->set_forced_breakpoint($i);
}
# break at any comma before the opening structure Added
# for -lp, but seems to be good in general. It isn't
# obvious how far back to look; the '5' below seems to
# work well and will catch the comma in something like
# push @list, myfunc( $param, $param, ..
my $icomma = $last_comma_index[$depth];
if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
if ( !$forced_breakpoint_to_go[$icomma] ) {
$self->set_forced_breakpoint($icomma);
}
}
}
#-----------------------------------------------------------
# Break open a logical container open if it was already open
#-----------------------------------------------------------
elsif ($is_simple_logical_expression
&& $has_old_logical_breakpoints[$current_depth] )
{
$self->set_logical_breakpoints($current_depth);
}
# Handle long container which does not get opened up
elsif ($is_long_term) {
# must set fake breakpoint to alert outer containers that
# they are complex
set_fake_breakpoint();
}
else {
## do not break open
}
return;
} ## end sub break_lists_decreasing_depth
} ## end closure break_lists
my %is_kwiZ;
my %is_key_type;
BEGIN {
# Added 'w' to fix b1172
my @q = qw(k w i Z ->);
@is_kwiZ{@q} = (1) x scalar(@q);
# added = for b1211
@q = qw<( [ { L R } ] ) = b>;
push @q, ',';
@is_key_type{@q} = (1) x scalar(@q);
} ## end BEGIN
use constant DEBUG_FIND_START => 0;
sub find_token_starting_list {
# When testing to see if a block will fit on one line, some
# previous token(s) may also need to be on the line; particularly
# if this is a sub call. So we will look back at least one
# token.
my ( $self, $i_opening_paren ) = @_;
# This will be the return index
my $i_opening_minus = $i_opening_paren;
if ( $i_opening_minus <= 0 ) {
return $i_opening_minus;
}
my $im1 = $i_opening_paren - 1;
my ( $iprev_nb, $type_prev_nb ) = ( $im1, $types_to_go[$im1] );
if ( $type_prev_nb eq 'b' && $iprev_nb > 0 ) {
$iprev_nb -= 1;
$type_prev_nb = $types_to_go[$iprev_nb];
}
if ( $type_prev_nb eq ',' ) {
# a previous comma is a good break point
# $i_opening_minus = $i_opening_paren;
}
elsif (
$tokens_to_go[$i_opening_paren] eq '('
# non-parens added here to fix case b1186
|| $is_kwiZ{$type_prev_nb}
)
{
$i_opening_minus = $im1;
# Walk back to improve length estimate...
# FIX for cases b1169 b1170 b1171: start walking back
# at the previous nonblank. This makes the result insensitive
# to the flag --space-function-paren, and similar.
# previous loop: for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
foreach my $j ( reverse( 0 .. $iprev_nb ) ) {
if ( $is_key_type{ $types_to_go[$j] } ) {
# fix for b1211
if ( $types_to_go[$j] eq '=' ) { $i_opening_minus = $j }
last;
}
$i_opening_minus = $j;
}
if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
}
else {
## previous token not special
}
DEBUG_FIND_START && print <<EOM;
FIND_START: i=$i_opening_paren tok=$tokens_to_go[$i_opening_paren] => im=$i_opening_minus tok=$tokens_to_go[$i_opening_minus]
EOM
return $i_opening_minus;
} ## end sub find_token_starting_list
{ ## begin closure table_maker
my %is_keyword_with_special_leading_term;
BEGIN {
# These keywords have prototypes which allow a special leading item
# followed by a list
my @q = qw(
chmod
formline
grep
join
kill
map
pack
printf
push
sprintf
unshift
);
@is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
} ## end BEGIN
use constant DEBUG_SPARSE => 0;
sub table_maker {
# Given a list of comma-separated items, set breakpoints at some of
# the commas, if necessary, to make it easy to read.
# This is done by making calls to 'set_forced_breakpoint'.
# This is a complex routine because there are many special cases.
# Returns: nothing
# The numerous variables involved are contained three hashes:
# $rhash_IN : For contents see the calling routine
# $rhash_A: For contents see return from sub 'table_layout_A'
# $rhash_B: For contents see return from sub 'table_layout_B'
my ( $self, $rhash_IN ) = @_;
# Find lengths of all list items needed for calculating page layout
my $rhash_A = table_layout_A($rhash_IN);
return if ( !defined($rhash_A) );
# Some variables received from caller...
my $i_closing_paren = $rhash_IN->{i_closing_paren};
my $i_opening_paren = $rhash_IN->{i_opening_paren};
my $has_broken_sublist = $rhash_IN->{has_broken_sublist};
my $interrupted = $rhash_IN->{interrupted};
#-----------------------------------------
# Section A: Handle some special cases ...
#-----------------------------------------
#-------------------------------------------------------------
# Special Case A1: Compound List Rule 1:
# Break at (almost) every comma for a list containing a broken
# sublist. This has higher priority than the Interrupted List
# Rule.
#-------------------------------------------------------------
if ($has_broken_sublist) {
$self->apply_broken_sublist_rule( $rhash_A, $interrupted );
return;
}
#--------------------------------------------------------------
# Special Case A2: Interrupted List Rule:
# A list is forced to use old breakpoints if it was interrupted
# by side comments or blank lines, or requested by user.
#--------------------------------------------------------------
if ( $rOpts_break_at_old_comma_breakpoints
|| $interrupted
|| $i_opening_paren < 0 )
{
my $i_first_comma = $rhash_A->{_i_first_comma};
my $i_true_last_comma = $rhash_A->{_i_true_last_comma};
$self->copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
return;
}
#-----------------------------------------------------------------
# Special Case A3: If it fits on one line, return and let the line
# break logic decide if and where to break.
#-----------------------------------------------------------------
# The -bbxi=2 parameters can add an extra hidden level of indentation
# so they need a tolerance to avoid instability. Fixes b1259, 1260.
my $opening_token = $tokens_to_go[$i_opening_paren];
my $tol = 0;
if ( $break_before_container_types{$opening_token}
&& $container_indentation_options{$opening_token}
&& $container_indentation_options{$opening_token} == 2 )
{
$tol = $rOpts_indent_columns;
# use greater of -ci and -i (fix for case b1334)
if ( $tol < $rOpts_continuation_indentation ) {
$tol = $rOpts_continuation_indentation;
}
}
# Increase tol when -atc and -dtc are both used to allow for
# possible loss in length on next pass due to a comma. Fixes b1455.
if ( $rOpts_delete_trailing_commas && $rOpts_add_trailing_commas ) {
$tol += 1;
}
my $i_opening_minus = $self->find_token_starting_list($i_opening_paren);
my $excess =
$self->excess_line_length( $i_opening_minus, $i_closing_paren );
return if ( $excess + $tol <= 0 );
#---------------------------------------
# Section B: Handle a multiline list ...
#---------------------------------------
$self->break_multiline_list( $rhash_IN, $rhash_A, $i_opening_minus );
return;
} ## end sub table_maker
sub apply_broken_sublist_rule {
my ( $self, $rhash_A, $interrupted ) = @_;
# Break at (almost) every comma for a list containing a broken
# sublist.
my $ritem_lengths = $rhash_A->{_ritem_lengths};
my $ri_term_begin = $rhash_A->{_ri_term_begin};
my $ri_term_end = $rhash_A->{_ri_term_end};
my $ri_term_comma = $rhash_A->{_ri_term_comma};
my $item_count = $rhash_A->{_item_count_A};
my $i_first_comma = $rhash_A->{_i_first_comma};
my $i_true_last_comma = $rhash_A->{_i_true_last_comma};
# Break at every comma except for a comma between two
# simple, small terms. This prevents long vertical
# columns of, say, just 0's.
my $small_length = 10; # 2 + actual maximum length wanted
# We'll insert a break in long runs of small terms to
# allow alignment in uniform tables.
my $skipped_count = 0;
my $columns = table_columns_available($i_first_comma);
my $fields = int( $columns / $small_length );
if ( $rOpts_maximum_fields_per_table
&& $fields > $rOpts_maximum_fields_per_table )
{
$fields = $rOpts_maximum_fields_per_table;
}
my $max_skipped_count = $fields - 1;
my $is_simple_last_term = 0;
my $is_simple_next_term = 0;
foreach my $j ( 0 .. $item_count ) {
$is_simple_last_term = $is_simple_next_term;
$is_simple_next_term = 0;
if ( $j < $item_count
&& $ri_term_end->[$j] == $ri_term_begin->[$j]
&& $ritem_lengths->[$j] <= $small_length )
{
$is_simple_next_term = 1;
}
next if $j == 0;
if ( $is_simple_last_term
&& $is_simple_next_term
&& $skipped_count < $max_skipped_count )
{
$skipped_count++;
}
else {
$skipped_count = 0;
my $i_tc = $ri_term_comma->[ $j - 1 ];
last unless defined $i_tc;
$self->set_forced_breakpoint($i_tc);
}
}
# always break at the last comma if this list is
# interrupted; we wouldn't want to leave a terminal '{', for
# example.
if ($interrupted) {
$self->set_forced_breakpoint($i_true_last_comma);
}
return;
} ## end sub apply_broken_sublist_rule
sub set_emergency_comma_breakpoints {
my (
$self, #
$number_of_fields_best,
$rhash_IN,
$comma_count,
$i_first_comma,
) = @_;
# The computed number of table fields is negative, so we have to make
# an emergency fix.
my $rcomma_index = $rhash_IN->{rcomma_index};
my $next_nonblank_type = $rhash_IN->{next_nonblank_type};
my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart};
my $must_break_open = $rhash_IN->{must_break_open};
# are we an item contained in an outer list?
my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
# In many cases, it may be best to not force a break if there is just
# one comma, because the standard continuation break logic will do a
# better job without it.
# In the common case that all but one of the terms can fit
# on a single line, it may look better not to break open the
# containing parens. Consider, for example
# $color =
# join ( '/',
# sort { $color_value{$::a} <=> $color_value{$::b}; }
# keys %colors );
# which will look like this with the container broken:
# $color = join (
# '/',
# sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
# );
# Here is an example of this rule for a long last term:
# log_message( 0, 256, 128,
# "Number of routes in adj-RIB-in to be considered: $peercount" );
# And here is an example with a long first term:
# $s = sprintf(
# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
# $r, $pu, $ps, $cu, $cs, $tt
# )
# if $style eq 'all';
my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
my $long_last_term = $self->excess_line_length( 0, $i_last_comma ) <= 0;
my $long_first_term =
$self->excess_line_length( $i_first_comma + 1, $max_index_to_go ) <=
0;
# break at every comma ...
if (
# if requested by user or is best looking
$number_of_fields_best == 1
# or if this is a sublist of a larger list
|| $in_hierarchical_list
# or if multiple commas and we don't have a long first or last
# term
|| ( $comma_count > 1
&& !( $long_last_term || $long_first_term ) )
)
{
foreach ( 0 .. $comma_count - 1 ) {
$self->set_forced_breakpoint( $rcomma_index->[$_] );
}
}
elsif ($long_last_term) {
$self->set_forced_breakpoint($i_last_comma);
${$rdo_not_break_apart} = 1 unless $must_break_open;
}
elsif ($long_first_term) {
$self->set_forced_breakpoint($i_first_comma);
}
else {
# let breaks be defined by default bond strength logic
}
return;
} ## end sub set_emergency_comma_breakpoints
sub break_multiline_list {
my ( $self, $rhash_IN, $rhash_A, $i_opening_minus ) = @_;
# We have a list spanning multiple lines and are trying
# to decide the best way to set comma breakpoints.
# Overriden variables
my $item_count = $rhash_A->{_item_count_A};
my $identifier_count = $rhash_A->{_identifier_count_A};
# Derived variables:
my $ritem_lengths = $rhash_A->{_ritem_lengths};
my $ri_term_begin = $rhash_A->{_ri_term_begin};
my $ri_term_end = $rhash_A->{_ri_term_end};
my $ri_term_comma = $rhash_A->{_ri_term_comma};
my $rmax_length = $rhash_A->{_rmax_length};
my $comma_count = $rhash_A->{_comma_count};
my $i_effective_last_comma = $rhash_A->{_i_effective_last_comma};
my $first_term_length = $rhash_A->{_first_term_length};
my $i_first_comma = $rhash_A->{_i_first_comma};
my $i_last_comma = $rhash_A->{_i_last_comma};
my $i_true_last_comma = $rhash_A->{_i_true_last_comma};
# Variables received from caller
my $i_opening_paren = $rhash_IN->{i_opening_paren};
my $i_closing_paren = $rhash_IN->{i_closing_paren};
my $rcomma_index = $rhash_IN->{rcomma_index};
my $next_nonblank_type = $rhash_IN->{next_nonblank_type};
my $list_type = $rhash_IN->{list_type};
my $interrupted = $rhash_IN->{interrupted};
my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart};
my $must_break_open = $rhash_IN->{must_break_open};
## NOTE: these input vars from caller use the values from rhash_A (see above):
## my $item_count = $rhash_IN->{item_count};
## my $identifier_count = $rhash_IN->{identifier_count};
# NOTE: i_opening_paren changes value below so we need to get these here
my $opening_is_in_block = $self->is_in_block_by_i($i_opening_paren);
my $opening_token = $tokens_to_go[$i_opening_paren];
#---------------------------------------------------------------
# Section B1: Determine '$number_of_fields' = the best number of
# fields to use if this is to be formatted as a table.
#---------------------------------------------------------------
# Now we know that this block spans multiple lines; we have to set
# at least one breakpoint -- real or fake -- as a signal to break
# open any outer containers.
set_fake_breakpoint();
# Set a flag indicating if we need to break open to keep -lp
# items aligned. This is necessary if any of the list terms
# exceeds the available space after the '('.
my $need_lp_break_open = $must_break_open;
my $is_lp_formatting = ref( $leading_spaces_to_go[$i_first_comma] );
if ( $is_lp_formatting && !$must_break_open ) {
my $columns_if_unbroken =
$maximum_line_length_at_level[ $levels_to_go[$i_opening_minus] ]
- total_line_length( $i_opening_minus, $i_opening_paren );
$need_lp_break_open =
( $rmax_length->[0] > $columns_if_unbroken )
|| ( $rmax_length->[1] > $columns_if_unbroken )
|| ( $first_term_length > $columns_if_unbroken );
}
my $hash_B =
$self->table_layout_B( $rhash_IN, $rhash_A, $is_lp_formatting );
return if ( !defined($hash_B) );
# Updated variables
$i_first_comma = $hash_B->{_i_first_comma_B};
$i_opening_paren = $hash_B->{_i_opening_paren_B};
$item_count = $hash_B->{_item_count_B};
# New variables
my $columns = $hash_B->{_columns};
my $formatted_columns = $hash_B->{_formatted_columns};
my $formatted_lines = $hash_B->{_formatted_lines};
my $max_width = $hash_B->{_max_width};
my $new_identifier_count = $hash_B->{_new_identifier_count};
my $number_of_fields = $hash_B->{_number_of_fields};
my $odd_or_even = $hash_B->{_odd_or_even};
my $packed_columns = $hash_B->{_packed_columns};
my $packed_lines = $hash_B->{_packed_lines};
my $pair_width = $hash_B->{_pair_width};
my $ri_ragged_break_list = $hash_B->{_ri_ragged_break_list};
my $use_separate_first_term = $hash_B->{_use_separate_first_term};
# are we an item contained in an outer list?
my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
my $unused_columns = $formatted_columns - $packed_columns;
# set some empirical parameters to help decide if we should try to
# align; high sparsity does not look good, especially with few lines
my $sparsity = ($unused_columns) / ($formatted_columns);
my $max_allowed_sparsity =
( $item_count < 3 ) ? 0.1
: ( $packed_lines == 1 ) ? 0.15
: ( $packed_lines == 2 ) ? 0.4
: 0.7;
my $two_line_word_wrap_ok;
if ( $opening_token eq '(' ) {
# default is to allow wrapping of short paren lists
$two_line_word_wrap_ok = 1;
# but turn off word wrap where requested
if ($rOpts_break_open_compact_parens) {
# This parameter is a one-character flag, as follows:
# '0' matches no parens -> break open NOT OK -> word wrap OK
# '1' matches all parens -> break open OK -> word wrap NOT OK
# Other values are the same as used by the weld-exclusion-list
my $flag = $rOpts_break_open_compact_parens;
if ( $flag eq '*'
|| $flag eq '1' )
{
$two_line_word_wrap_ok = 0;
}
elsif ( $flag eq '0' ) {
$two_line_word_wrap_ok = 1;
}
else {
my $seqno = $type_sequence_to_go[$i_opening_paren];
$two_line_word_wrap_ok =
!$self->match_paren_control_flag( $seqno, $flag );
}
}
}
#-------------------------------------------------------------------
# Section B2: Check for shortcut methods, which avoid treating
# a list as a table for relatively small parenthesized lists. These
# are usually easier to read if not formatted as tables.
#-------------------------------------------------------------------
if (
$packed_lines <= 2 # probably can fit in 2 lines
&& $item_count < 9 # doesn't have too many items
&& $opening_is_in_block # not a sub-container
&& $two_line_word_wrap_ok # ok to wrap this paren list
)
{
# Section B2A: Shortcut method 1: for -lp and just one comma:
# This is a no-brainer, just break at the comma.
if (
$is_lp_formatting # -lp
&& $item_count == 2 # two items, one comma
&& !$must_break_open
)
{
my $i_break = $rcomma_index->[0];
$self->set_forced_breakpoint($i_break);
${$rdo_not_break_apart} = 1;
return;
}
# Section B2B: Shortcut method 2 is for most small ragged lists
# which might look best if not displayed as a table.
if (
( $number_of_fields == 2 && $item_count == 3 )
|| (
$new_identifier_count > 0 # isn't all quotes
&& $sparsity > 0.15
) # would be fairly spaced gaps if aligned
)
{
my $break_count = $self->set_ragged_breakpoints( $ri_term_comma,
$ri_ragged_break_list );
++$break_count if ($use_separate_first_term);
# NOTE: we should really use the true break count here,
# which can be greater if there are large terms and
# little space, but usually this will work well enough.
if ( !$must_break_open ) {
if ( $break_count <= 1
|| ( $is_lp_formatting && !$need_lp_break_open ) )
{
${$rdo_not_break_apart} = 1;
}
}
return;
}
} ## end shortcut methods
# debug stuff
DEBUG_SPARSE && do {
# How many spaces across the page will we fill?
my $columns_per_line =
( int $number_of_fields / 2 ) * $pair_width +
( $number_of_fields % 2 ) * $max_width;
print {*STDOUT}
"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
};
#------------------------------------------------------------------
# Section B3: Compound List Rule 2:
# If this list is too long for one line, and it is an item of a
# larger list, then we must format it, regardless of sparsity
# (ian.t). One reason that we have to do this is to trigger
# Compound List Rule 1, above, which causes breaks at all commas of
# all outer lists. In this way, the structure will be properly
# displayed.
#------------------------------------------------------------------
# Decide if this list is too long for one line unless broken
my $total_columns = table_columns_available($i_opening_paren);
my $too_long = $packed_columns > $total_columns;
# For a paren list, include the length of the token just before the
# '(' because this is likely a sub call, and we would have to
# include the sub name on the same line as the list. This is still
# imprecise, but not too bad. (steve.t)
if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
$too_long = $self->excess_line_length( $i_opening_minus,
$i_effective_last_comma + 1 ) > 0;
}
# TODO: For an item after a '=>', try to include the length of the
# thing before the '=>'. This is crude and should be improved by
# actually looking back token by token.
if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
my $i_opening_minus_test = $i_opening_paren - 4;
if ( $i_opening_minus >= 0 ) {
$too_long = $self->excess_line_length( $i_opening_minus_test,
$i_effective_last_comma + 1 ) > 0;
}
}
# Always break lists contained in '[' and '{' if too long for 1 line,
# and always break lists which are too long and part of a more complex
# structure.
my $must_break_open_container = $must_break_open
|| ( $too_long
&& ( $in_hierarchical_list || !$two_line_word_wrap_ok ) );
#--------------------------------------------------------------------
# Section B4: A table will work here. But do not attempt to align
# columns if this is a tiny table or it would be too spaced. It
# seems that the more packed lines we have, the sparser the list that
# can be allowed and still look ok.
#--------------------------------------------------------------------
if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
|| ( $formatted_lines < 2 )
|| ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
)
{
#----------------------------------------------------------------
# Section B4A: too sparse: would not look good aligned in a table
#----------------------------------------------------------------
# use old breakpoints if this is a 'big' list
if ( $packed_lines > 2 && $item_count > 10 ) {
write_logfile_entry("List sparse: using old breakpoints\n");
$self->copy_old_breakpoints( $i_first_comma, $i_last_comma );
}
# let the continuation logic handle it if 2 lines
else {
my $break_count = $self->set_ragged_breakpoints( $ri_term_comma,
$ri_ragged_break_list );
++$break_count if ($use_separate_first_term);
if ( !$must_break_open_container ) {
if ( $break_count <= 1
|| ( $is_lp_formatting && !$need_lp_break_open ) )
{
${$rdo_not_break_apart} = 1;
}
}
}
return;
}
#--------------------------------------------
# Section B4B: Go ahead and format as a table
#--------------------------------------------
$self->write_formatted_table( $number_of_fields, $comma_count,
$rcomma_index, $use_separate_first_term );
return;
} ## end sub break_multiline_list
sub table_layout_A {
my ($rhash_IN) = @_;
# Find lengths of all list items needed to calculate page layout
# Returns:
# - nothing if this list is empty, or
# - a ref to a hash containing some derived parameters
my $i_opening_paren = $rhash_IN->{i_opening_paren};
my $i_closing_paren = $rhash_IN->{i_closing_paren};
my $identifier_count = $rhash_IN->{identifier_count};
my $rcomma_index = $rhash_IN->{rcomma_index};
my $item_count = $rhash_IN->{item_count};
# nothing to do if no commas seen
return if ( $item_count < 1 );
my $i_first_comma = $rcomma_index->[0];
my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
my $i_last_comma = $i_true_last_comma;
if ( $i_last_comma >= $max_index_to_go ) {
$item_count -= 1;
return if ( $item_count < 1 );
$i_last_comma = $rcomma_index->[ $item_count - 1 ];
}
my $comma_count = $item_count;
my $ritem_lengths = [];
my $ri_term_begin = [];
my $ri_term_end = [];
my $ri_term_comma = [];
my $rmax_length = [ 0, 0 ];
my $i_prev_plus;
my $first_term_length;
my $i = $i_opening_paren;
my $is_odd = 1;
foreach my $j ( 0 .. $comma_count - 1 ) {
$is_odd = 1 - $is_odd;
$i_prev_plus = $i + 1;
$i = $rcomma_index->[$j];
my $i_term_end =
( $i == 0 || $types_to_go[ $i - 1 ] eq 'b' )
? $i - 2
: $i - 1;
my $i_term_begin =
( $types_to_go[$i_prev_plus] eq 'b' )
? $i_prev_plus + 1
: $i_prev_plus;
push @{$ri_term_begin}, $i_term_begin;
push @{$ri_term_end}, $i_term_end;
push @{$ri_term_comma}, $i;
# note: currently adding 2 to all lengths (for comma and space)
my $length =
2 + token_sequence_length( $i_term_begin, $i_term_end );
push @{$ritem_lengths}, $length;
if ( $j == 0 ) {
$first_term_length = $length;
}
else {
if ( $length > $rmax_length->[$is_odd] ) {
$rmax_length->[$is_odd] = $length;
}
}
}
# now we have to make a distinction between the comma count and item
# count, because the item count will be one greater than the comma
# count if the last item is not terminated with a comma
my $i_b =
( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
? $i_last_comma + 1
: $i_last_comma;
my $i_e =
( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
? $i_closing_paren - 2
: $i_closing_paren - 1;
my $i_effective_last_comma = $i_last_comma;
my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
if ( $last_item_length > 0 ) {
# add 2 to length because other lengths include a comma and a blank
$last_item_length += 2;
push @{$ritem_lengths}, $last_item_length;
push @{$ri_term_begin}, $i_b + 1;
push @{$ri_term_end}, $i_e;
push @{$ri_term_comma}, undef;
my $i_odd = $item_count % 2;
if ( $last_item_length > $rmax_length->[$i_odd] ) {
$rmax_length->[$i_odd] = $last_item_length;
}
$item_count++;
$i_effective_last_comma = $i_e + 1;
if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
$identifier_count++;
}
}
# be sure we do not extend beyond the current list length
if ( $i_effective_last_comma >= $max_index_to_go ) {
$i_effective_last_comma = $max_index_to_go - 1;
}
# Return the hash of derived variables.
return {
# Updated variables
_item_count_A => $item_count,
_identifier_count_A => $identifier_count,
# New variables
_ritem_lengths => $ritem_lengths,
_ri_term_begin => $ri_term_begin,
_ri_term_end => $ri_term_end,
_ri_term_comma => $ri_term_comma,
_rmax_length => $rmax_length,
_comma_count => $comma_count,
_i_effective_last_comma => $i_effective_last_comma,
_first_term_length => $first_term_length,
_i_first_comma => $i_first_comma,
_i_last_comma => $i_last_comma,
_i_true_last_comma => $i_true_last_comma,
};
} ## end sub table_layout_A
sub table_layout_B {
my ( $self, $rhash_IN, $rhash_A, $is_lp_formatting ) = @_;
# Determine variables for the best table layout, including
# the best number of fields.
# Returns:
# - nothing if nothing more to do
# - a ref to a hash containg some derived parameters
# Variables from caller
my $i_opening_paren = $rhash_IN->{i_opening_paren};
my $list_type = $rhash_IN->{list_type};
my $next_nonblank_type = $rhash_IN->{next_nonblank_type};
my $rcomma_index = $rhash_IN->{rcomma_index};
my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart};
# Table size variables
my $comma_count = $rhash_A->{_comma_count};
my $first_term_length = $rhash_A->{_first_term_length};
my $i_effective_last_comma = $rhash_A->{_i_effective_last_comma};
my $i_first_comma = $rhash_A->{_i_first_comma};
my $identifier_count = $rhash_A->{_identifier_count_A};
my $item_count = $rhash_A->{_item_count_A};
my $ri_term_begin = $rhash_A->{_ri_term_begin};
my $ri_term_comma = $rhash_A->{_ri_term_comma};
my $ri_term_end = $rhash_A->{_ri_term_end};
my $ritem_lengths = $rhash_A->{_ritem_lengths};
my $rmax_length = $rhash_A->{_rmax_length};
# Specify if the list must have an even number of fields or not.
# It is generally safest to assume an even number, because the
# list items might be a hash list. But if we can be sure that
# it is not a hash, then we can allow an odd number for more
# flexibility.
# 1 = odd field count ok, 2 = want even count
my $odd_or_even = 2;
if (
$identifier_count >= $item_count - 1
|| $is_assignment{$next_nonblank_type}
|| ( $list_type
&& $list_type ne '=>'
&& $list_type !~ /^[\:\?]$/ )
)
{
$odd_or_even = 1;
}
# do we have a long first term which should be
# left on a line by itself?
my $use_separate_first_term = (
$odd_or_even == 1 # only if we can use 1 field/line
&& $item_count > 3 # need several items
&& $first_term_length >
2 * $rmax_length->[0] - 2 # need long first term
&& $first_term_length >
2 * $rmax_length->[1] - 2 # need long first term
);
# or do we know from the type of list that the first term should
# be placed alone?
if ( !$use_separate_first_term ) {
if ( $is_keyword_with_special_leading_term{$list_type} ) {
$use_separate_first_term = 1;
# should the container be broken open?
if ( $item_count < 3 ) {
if ( $i_first_comma - $i_opening_paren < 4 ) {
${$rdo_not_break_apart} = 1;
}
}
elsif ($first_term_length < 20
&& $i_first_comma - $i_opening_paren < 4 )
{
my $columns = table_columns_available($i_first_comma);
if ( $first_term_length < $columns ) {
${$rdo_not_break_apart} = 1;
}
}
else {
## ok
}
}
}
# if so,
if ($use_separate_first_term) {
# ..set a break and update starting values
$self->set_forced_breakpoint($i_first_comma);
$item_count--;
#---------------------------------------------------------------
# Section B1A: Stop if one item remains ($i_first_comma = undef)
#---------------------------------------------------------------
# Fix for b1442: use '$item_count' here instead of '$comma_count'
# to make the result independent of any trailing comma.
return if ( $item_count <= 1 );
$i_opening_paren = $i_first_comma;
$i_first_comma = $rcomma_index->[1];
shift @{$ritem_lengths};
shift @{$ri_term_begin};
shift @{$ri_term_end};
shift @{$ri_term_comma};
}
# if not, update the metrics to include the first term
else {
if ( $first_term_length > $rmax_length->[0] ) {
$rmax_length->[0] = $first_term_length;
}
}
# Field width parameters
my $pair_width = ( $rmax_length->[0] + $rmax_length->[1] );
my $max_width =
( $rmax_length->[0] > $rmax_length->[1] )
? $rmax_length->[0]
: $rmax_length->[1];
# Number of free columns across the page width for laying out tables
my $columns = table_columns_available($i_first_comma);
# Patch for b1210 and b1216-b1218 when -vmll is set. If we are unable
# to break after an opening paren, then the maximum line length for the
# first line could be less than the later lines. So we need to reduce
# the line length. Normally, we will get a break after an opening
# paren, but in some cases we might not.
if ( $rOpts_variable_maximum_line_length
&& $tokens_to_go[$i_opening_paren] eq '('
&& @{$ri_term_begin} )
{
my $ib = $ri_term_begin->[0];
my $type = $types_to_go[$ib];
# So far, the only known instance of this problem is when
# a bareword follows an opening paren with -vmll
if ( $type eq 'w' ) {
# If a line starts with paren+space+terms, then its max length
# could be up to ci+2-i spaces less than if the term went out
# on a line after the paren. So..
my $tol_w = max( 0,
2 + $rOpts_continuation_indentation -
$rOpts_indent_columns );
$columns = max( 0, $columns - $tol_w );
## Here is the original b1210 fix, but it failed on b1216-b1218
##my $columns2 = table_columns_available($i_opening_paren);
##$columns = min( $columns, $columns2 );
}
}
# Estimated maximum number of fields which fit this space.
# This will be our first guess:
my $number_of_fields_max =
maximum_number_of_fields( $columns, $odd_or_even, $max_width,
$pair_width );
my $number_of_fields = $number_of_fields_max;
# Find the best-looking number of fields.
# This will be our second guess, if possible.
my ( $number_of_fields_best, $ri_ragged_break_list,
$new_identifier_count )
= $self->study_list_complexity( $ri_term_begin, $ri_term_end,
$ritem_lengths, $max_width );
if ( $number_of_fields_best != 0
&& $number_of_fields_best < $number_of_fields_max )
{
$number_of_fields = $number_of_fields_best;
}
# fix b1427
elsif ($number_of_fields_best > 1
&& $number_of_fields_best > $number_of_fields_max )
{
$number_of_fields_best = $number_of_fields_max;
}
else {
## ok
}
# If we are crowded and the -lp option is being used, try
# to undo some indentation
if (
$is_lp_formatting
&& (
$number_of_fields == 0
|| ( $number_of_fields == 1
&& $number_of_fields != $number_of_fields_best )
)
)
{
( $number_of_fields, $number_of_fields_best, $columns ) =
$self->lp_table_fix(
$columns,
$i_first_comma,
$max_width,
$number_of_fields,
$number_of_fields_best,
$odd_or_even,
$pair_width,
$ritem_lengths,
);
}
# try for one column if two won't work
if ( $number_of_fields <= 0 ) {
$number_of_fields = int( $columns / $max_width );
}
# The user can place an upper bound on the number of fields,
# which can be useful for doing maintenance on tables
if ( $rOpts_maximum_fields_per_table
&& $number_of_fields > $rOpts_maximum_fields_per_table )
{
$number_of_fields = $rOpts_maximum_fields_per_table;
}
# How many columns (characters) and lines would this container take
# if no additional whitespace were added?
my $packed_columns = token_sequence_length( $i_opening_paren + 1,
$i_effective_last_comma + 1 );
if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
my $packed_lines = 1 + int( $packed_columns / $columns );
#-----------------------------------------------------------------
# Section B1B: Stop here if we did not compute a positive number of
# fields. In this case we just have to bail out.
#-----------------------------------------------------------------
if ( $number_of_fields <= 0 ) {
$self->set_emergency_comma_breakpoints(
$number_of_fields_best,
$rhash_IN,
$comma_count,
$i_first_comma,
);
return;
}
#------------------------------------------------------------------
# Section B1B: We have a tentative field count that seems to work.
# Now we must look more closely to determine if a table layout will
# actually look okay.
#------------------------------------------------------------------
# How many lines will this require?
my $formatted_lines = $item_count / ($number_of_fields);
if ( $formatted_lines != int $formatted_lines ) {
$formatted_lines = 1 + int $formatted_lines;
}
# So far we've been trying to fill out to the right margin. But
# compact tables are easier to read, so let's see if we can use fewer
# fields without increasing the number of lines.
$number_of_fields = compactify_table( $item_count, $number_of_fields,
$formatted_lines, $odd_or_even );
my $formatted_columns;
if ( $number_of_fields > 1 ) {
$formatted_columns =
( $pair_width * ( int( $item_count / 2 ) ) +
( $item_count % 2 ) * $max_width );
}
else {
$formatted_columns = $max_width * $item_count;
}
if ( $formatted_columns < $packed_columns ) {
$formatted_columns = $packed_columns;
}
# Construce hash_B:
return {
# Updated variables
_i_first_comma_B => $i_first_comma,
_i_opening_paren_B => $i_opening_paren,
_item_count_B => $item_count,
# New variables
_columns => $columns,
_formatted_columns => $formatted_columns,
_formatted_lines => $formatted_lines,
_max_width => $max_width,
_new_identifier_count => $new_identifier_count,
_number_of_fields => $number_of_fields,
_odd_or_even => $odd_or_even,
_packed_columns => $packed_columns,
_packed_lines => $packed_lines,
_pair_width => $pair_width,
_ri_ragged_break_list => $ri_ragged_break_list,
_use_separate_first_term => $use_separate_first_term,
};
} ## end sub table_layout_B
sub lp_table_fix {
# try to undo some -lp indentation to improve table formatting
my (
$self, #
$columns,
$i_first_comma,
$max_width,
$number_of_fields,
$number_of_fields_best,
$odd_or_even,
$pair_width,
$ritem_lengths,
) = @_;
my $available_spaces =
$self->get_available_spaces_to_go($i_first_comma);
if ( $available_spaces > 0 ) {
my $spaces_wanted = $max_width - $columns; # for 1 field
if ( $number_of_fields_best == 0 ) {
$number_of_fields_best =
get_maximum_fields_wanted($ritem_lengths);
}
if ( $number_of_fields_best != 1 ) {
my $spaces_wanted_2 = 1 + $pair_width - $columns; # for 2 fields
if ( $available_spaces > $spaces_wanted_2 ) {
$spaces_wanted = $spaces_wanted_2;
}
}
if ( $spaces_wanted > 0 ) {
my $deleted_spaces =
$self->reduce_lp_indentation( $i_first_comma,
$spaces_wanted );
# redo the math
if ( $deleted_spaces > 0 ) {
$columns = table_columns_available($i_first_comma);
$number_of_fields =
maximum_number_of_fields( $columns, $odd_or_even,
$max_width, $pair_width );
if ( $number_of_fields_best == 1
&& $number_of_fields >= 1 )
{
$number_of_fields = $number_of_fields_best;
}
}
}
}
return ( $number_of_fields, $number_of_fields_best, $columns );
} ## end sub lp_table_fix
sub write_formatted_table {
# Write a table of comma separated items with fixed number of fields
my ( $self, $number_of_fields, $comma_count, $rcomma_index,
$use_separate_first_term )
= @_;
write_logfile_entry(
"List: auto formatting with $number_of_fields fields/row\n");
my $j_first_break =
$use_separate_first_term
? $number_of_fields
: $number_of_fields - 1;
my $j = $j_first_break;
while ( $j < $comma_count ) {
my $i_comma = $rcomma_index->[$j];
$self->set_forced_breakpoint($i_comma);
$j += $number_of_fields;
}
return;
} ## end sub write_formatted_table
} ## end closure set_comma_breakpoint_final
sub study_list_complexity {
# Look for complex tables which should be formatted with one term per line.
# Returns the following:
#
# \@i_ragged_break_list = list of good breakpoints to avoid lines
# which are hard to read
# $number_of_fields_best = suggested number of fields based on
# complexity; = 0 if any number may be used.
#
my ( $self, $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
my $item_count = @{$ri_term_begin};
my $complex_item_count = 0;
my $number_of_fields_best = $rOpts_maximum_fields_per_table;
my $i_max = @{$ritem_lengths} - 1;
##my @item_complexity;
my $i_last_last_break = -3;
my $i_last_break = -2;
my @i_ragged_break_list;
my $definitely_complex = 30;
my $definitely_simple = 12;
my $quote_count = 0;
for my $i ( 0 .. $i_max ) {
my $ib = $ri_term_begin->[$i];
my $ie = $ri_term_end->[$i];
# define complexity: start with the actual term length
my $weighted_length = ( $ritem_lengths->[$i] - 2 );
##TBD: join types here and check for variations
##my $str=join "", @tokens_to_go[$ib..$ie];
my $is_quote = 0;
if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
$is_quote = 1;
$quote_count++;
}
elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
$quote_count++;
}
else {
## ok
}
if ( $ib eq $ie ) {
if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
$complex_item_count++;
$weighted_length *= 2;
}
else {
}
}
else {
if ( first { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
$complex_item_count++;
$weighted_length *= 2;
}
if ( first { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
$weighted_length += 4;
}
}
# add weight for extra tokens.
$weighted_length += 2 * ( $ie - $ib );
## my $BUB = join '', @tokens_to_go[$ib..$ie];
## print "# COMPLEXITY:$weighted_length $BUB\n";
##push @item_complexity, $weighted_length;
# now mark a ragged break after this item it if it is 'long and
# complex':
if ( $weighted_length >= $definitely_complex ) {
# if we broke after the previous term
# then break before it too
if ( $i_last_break == $i - 1
&& $i > 1
&& $i_last_last_break != $i - 2 )
{
## TODO: don't strand a small term
pop @i_ragged_break_list;
push @i_ragged_break_list, $i - 2;
push @i_ragged_break_list, $i - 1;
}
push @i_ragged_break_list, $i;
$i_last_last_break = $i_last_break;
$i_last_break = $i;
}
# don't break before a small last term -- it will
# not look good on a line by itself.
elsif ($i == $i_max
&& $i_last_break == $i - 1
&& $weighted_length <= $definitely_simple )
{
pop @i_ragged_break_list;
}
else {
## ok
}
}
my $identifier_count = $i_max + 1 - $quote_count;
# Need more tuning here..
if ( $max_width > 12
&& $complex_item_count > $item_count / 2
&& $number_of_fields_best != 2 )
{
$number_of_fields_best = 1;
}
return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
} ## end sub study_list_complexity
sub get_maximum_fields_wanted {
# Not all tables look good with more than one field of items.
# This routine looks at a table and decides if it should be
# formatted with just one field or not.
# This coding is still under development.
my ($ritem_lengths) = @_;
my $number_of_fields_best = 0;
# For just a few items, we tentatively assume just 1 field.
my $item_count = @{$ritem_lengths};
if ( $item_count <= 5 ) {
$number_of_fields_best = 1;
}
# For larger tables, look at it both ways and see what looks best
else {
my $is_odd = 1;
my @max_length = ( 0, 0 );
my @last_length_2 = ( undef, undef );
my @first_length_2 = ( undef, undef );
my $last_length = undef;
my $total_variation_1 = 0;
my $total_variation_2 = 0;
my @total_variation_2 = ( 0, 0 );
foreach my $j ( 0 .. $item_count - 1 ) {
$is_odd = 1 - $is_odd;
my $length = $ritem_lengths->[$j];
if ( $length > $max_length[$is_odd] ) {
$max_length[$is_odd] = $length;
}
if ( defined($last_length) ) {
my $dl = abs( $length - $last_length );
$total_variation_1 += $dl;
}
$last_length = $length;
my $ll = $last_length_2[$is_odd];
if ( defined($ll) ) {
my $dl = abs( $length - $ll );
$total_variation_2[$is_odd] += $dl;
}
else {
$first_length_2[$is_odd] = $length;
}
$last_length_2[$is_odd] = $length;
}
$total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
if ( $total_variation_2 >= $factor * $total_variation_1 ) {
$number_of_fields_best = 1;
}
}
return ($number_of_fields_best);
} ## end sub get_maximum_fields_wanted
sub table_columns_available {
my $i_first_comma = shift;
my $columns =
$maximum_line_length_at_level[ $levels_to_go[$i_first_comma] ] -
leading_spaces_to_go($i_first_comma);
# Patch: the vertical formatter does not line up lines whose lengths
# exactly equal the available line length because of allowances
# that must be made for side comments. Therefore, the number of
# available columns is reduced by 1 character.
$columns -= 1;
return $columns;
} ## end sub table_columns_available
sub maximum_number_of_fields {
# how many fields will fit in the available space?
my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
my $max_pairs = int( $columns / $pair_width );
my $number_of_fields = $max_pairs * 2;
if ( $odd_or_even == 1
&& $max_pairs * $pair_width + $max_width <= $columns )
{
$number_of_fields++;
}
return $number_of_fields;
} ## end sub maximum_number_of_fields
sub compactify_table {
# given a table with a certain number of fields and a certain number
# of lines, see if reducing the number of fields will make it look
# better.
my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
my $min_fields = $number_of_fields;
while ($min_fields >= $odd_or_even
&& $min_fields * $formatted_lines >= $item_count )
{
$number_of_fields = $min_fields;
$min_fields -= $odd_or_even;
}
}
return $number_of_fields;
} ## end sub compactify_table
sub set_ragged_breakpoints {
# Set breakpoints in a list that cannot be formatted nicely as a
# table.
my ( $self, $ri_term_comma, $ri_ragged_break_list ) = @_;
my $break_count = 0;
foreach ( @{$ri_ragged_break_list} ) {
my $j = $ri_term_comma->[$_];
if ($j) {
$self->set_forced_breakpoint($j);
$break_count++;
}
}
return $break_count;
} ## end sub set_ragged_breakpoints
sub copy_old_breakpoints {
my ( $self, $i_first_comma, $i_last_comma ) = @_;
# We are formatting a list and have decided to make comma breaks
# the same as in the input file.
for my $i ( $i_first_comma .. $i_last_comma ) {
if ( $old_breakpoint_to_go[$i] ) {
# If the comma style is under certain controls, and if this is a
# comma breakpoint with the comma is at the beginning of the next
# line, then we must pass that index instead. This will allow sub
# set_forced_breakpoints to check and follow the user settings. This
# produces a uniform style and can prevent instability (b1422).
#
# The flag '$controlled_comma_style' will be set if the user
# entered any of -wbb=',' -wba=',' -kbb=',' -kba=','. It is not
# needed or set for the -boc flag.
my $ibreak = $i;
if ( $types_to_go[$ibreak] ne ',' && $controlled_comma_style ) {
my $index = $inext_to_go[$ibreak];
if ( $index > $ibreak && $types_to_go[$index] eq ',' ) {
$ibreak = $index;
}
}
$self->set_forced_breakpoint($ibreak);
}
}
return;
} ## end sub copy_old_breakpoints
sub set_nobreaks {
my ( $self, $i, $j ) = @_;
if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
0 && do {
my ( $a, $b, $c ) = caller();
print {*STDOUT}
"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
};
@nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
}
# shouldn't happen; non-critical error
else {
if (DEVEL_MODE) {
my ( $a, $b, $c ) = caller();
Fault(<<EOM);
NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go
EOM
}
}
return;
} ## end sub set_nobreaks
###############################################
# CODE SECTION 12: Code for setting indentation
###############################################
sub token_sequence_length {
# return length of tokens ($ibeg .. $iend) including $ibeg & $iend
my ( $ibeg, $iend ) = @_;
# fix possible negative starting index
if ( $ibeg < 0 ) { $ibeg = 0 }
# returns 0 if index range is empty (some subs assume this)
if ( $ibeg > $iend ) {
return 0;
}
return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
} ## end sub token_sequence_length
sub total_line_length {
# return length of a line of tokens ($ibeg .. $iend)
my ( $ibeg, $iend ) = @_;
# get the leading spaces on this line ...
my $spaces = $leading_spaces_to_go[$ibeg];
if ( ref($spaces) ) { $spaces = $spaces->get_spaces() }
# ... then add the net token length
return $spaces + $summed_lengths_to_go[ $iend + 1 ] -
$summed_lengths_to_go[$ibeg];
} ## end sub total_line_length
sub excess_line_length {
# return number of characters by which a line of tokens ($ibeg..$iend)
# exceeds the allowable line length.
# NOTE: profiling shows that efficiency of this routine is essential.
my ( $self, $ibeg, $iend, $ignore_right_weld ) = @_;
# Start with the leading spaces on this line ...
my $excess = $leading_spaces_to_go[$ibeg];
if ( ref($excess) ) { $excess = $excess->get_spaces() }
# ... and include right weld lengths unless requested not to
if ( $total_weld_count
&& $type_sequence_to_go[$iend]
&& !$ignore_right_weld )
{
my $wr = $self->[_rweld_len_right_at_K_]->{ $K_to_go[$iend] };
$excess += $wr if defined($wr);
}
# ... then add the net token length, minus the maximum length
return $excess +
$summed_lengths_to_go[ $iend + 1 ] -
$summed_lengths_to_go[$ibeg] -
$maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
} ## end sub excess_line_length
sub get_spaces {
# return the number of leading spaces associated with an indentation
# variable $indentation is either a constant number of spaces or an object
# with a get_spaces method.
my $indentation = shift;
return ref($indentation) ? $indentation->get_spaces() : $indentation;
} ## end sub get_spaces
sub get_recoverable_spaces {
# return the number of spaces (+ means shift right, - means shift left)
# that we would like to shift a group of lines with the same indentation
# to get them to line up with their opening parens
my $indentation = shift;
return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
} ## end sub get_recoverable_spaces
sub get_available_spaces_to_go {
my ( $self, $ii ) = @_;
my $item = $leading_spaces_to_go[$ii];
# return the number of available leading spaces associated with an
# indentation variable. $indentation is either a constant number of
# spaces or an object with a get_available_spaces method.
return ref($item) ? $item->get_available_spaces() : 0;
} ## end sub get_available_spaces_to_go
{ ## begin closure set_lp_indentation
use constant DEBUG_LP => 0;
# Stack of -lp index objects which survives between batches.
my $rLP;
my $max_lp_stack;
# The predicted position of the next opening container which may start
# an -lp indentation level. This survives between batches.
my $lp_position_predictor;
BEGIN {
# Index names for the -lp stack variables.
# Do not combine with other BEGIN blocks (c101).
my $i = 0;
use constant {
_lp_ci_level_ => $i++,
_lp_level_ => $i++,
_lp_object_ => $i++,
_lp_container_seqno_ => $i++,
_lp_space_count_ => $i++,
};
} ## end BEGIN
sub initialize_lp_vars {
# initialize gnu variables for a new file;
# must be called once at the start of a new file.
$lp_position_predictor = 0;
$max_lp_stack = 0;
# we can turn off -lp if all levels will be at or above the cutoff
if ( $high_stress_level <= 1 ) {
$rOpts_line_up_parentheses = 0;
$rOpts_extended_line_up_parentheses = 0;
}
# fix for b1459: -naws adds stress for -xlp
if ( $high_stress_level <= 2 && !$rOpts_add_whitespace ) {
$rOpts_extended_line_up_parentheses = 0;
}
$rLP = [];
# initialize the leading whitespace stack to negative levels
# so that we can never run off the end of the stack
$rLP->[$max_lp_stack]->[_lp_ci_level_] = -1;
$rLP->[$max_lp_stack]->[_lp_level_] = -1;
$rLP->[$max_lp_stack]->[_lp_object_] = undef;
$rLP->[$max_lp_stack]->[_lp_container_seqno_] = SEQ_ROOT;
$rLP->[$max_lp_stack]->[_lp_space_count_] = 0;
return;
} ## end sub initialize_lp_vars
# hashes for efficient testing
my %hash_test1;
my %hash_test2;
my %hash_test3;
BEGIN {
my @q = qw< } ) ] >;
@hash_test1{@q} = (1) x scalar(@q);
@q = qw(: ? f);
push @q, ',';
@hash_test2{@q} = (1) x scalar(@q);
@q = qw( . || && );
@hash_test3{@q} = (1) x scalar(@q);
} ## end BEGIN
# shared variables, re-initialized for each batch
my $rlp_object_list;
my $max_lp_object_list;
my %lp_comma_count;
my %lp_arrow_count;
my $space_count;
my $current_level;
my $current_ci_level;
my $ii_begin_line;
my $in_lp_mode;
my $stack_changed;
my $K_last_nonblank;
my $last_nonblank_token;
my $last_nonblank_type;
my $last_last_nonblank_type;
sub set_lp_indentation {
my ($self) = @_;
#------------------------------------------------------------------
# Define the leading whitespace for all tokens in the current batch
# when the -lp formatting is selected.
#------------------------------------------------------------------
# Returns number of tokens in this batch which have leading spaces
# defined by an lp object:
my $lp_object_count_this_batch = 0;
# Safety check, should not be needed:
if ( !$rOpts_line_up_parentheses
|| !defined($max_index_to_go)
|| $max_index_to_go < 0 )
{
return $lp_object_count_this_batch;
}
# List of -lp indentation objects created in this batch
$rlp_object_list = [];
$max_lp_object_list = -1;
%lp_comma_count = ();
%lp_arrow_count = ();
$space_count = undef;
$current_level = undef;
$current_ci_level = undef;
$ii_begin_line = 0;
$in_lp_mode = 0;
$stack_changed = 1;
$K_last_nonblank = undef;
$last_nonblank_token = EMPTY_STRING;
$last_nonblank_type = EMPTY_STRING;
$last_last_nonblank_type = EMPTY_STRING;
my %last_lp_equals = ();
my $rLL = $self->[_rLL_];
my $starting_in_quote = $self->[_this_batch_]->[_starting_in_quote_];
my $imin = 0;
# The 'starting_in_quote' flag means that the first token is the first
# token of a line and it is also the continuation of some kind of
# multi-line quote or pattern. It must have no added leading
# whitespace, so we can skip it.
if ($starting_in_quote) {
$imin += 1;
}
my $Kpnb = $K_to_go[0] - 1;
if ( $Kpnb > 0 && $rLL->[$Kpnb]->[_TYPE_] eq 'b' ) {
$Kpnb -= 1;
}
if ( $Kpnb >= 0 && $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) {
$K_last_nonblank = $Kpnb;
}
if ( defined($K_last_nonblank) ) {
$last_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
$last_nonblank_type = $rLL->[$K_last_nonblank]->[_TYPE_];
}
#-----------------------------------
# Loop over all tokens in this batch
#-----------------------------------
foreach my $ii ( $imin .. $max_index_to_go ) {
my $type = $types_to_go[$ii];
my $token = $tokens_to_go[$ii];
my $level = $levels_to_go[$ii];
my $ci_level = $ci_levels_to_go[$ii];
my $total_depth = $nesting_depth_to_go[$ii];
# get the top state from the stack if it has changed
if ($stack_changed) {
my $rLP_top = $rLP->[$max_lp_stack];
my $lp_object = $rLP_top->[_lp_object_];
if ($lp_object) {
( $space_count, $current_level, $current_ci_level ) =
@{ $lp_object->get_spaces_level_ci() };
}
else {
$current_ci_level = $rLP_top->[_lp_ci_level_];
$current_level = $rLP_top->[_lp_level_];
$space_count = $rLP_top->[_lp_space_count_];
}
$stack_changed = 0;
}
#------------------------------------------------------------
# Break at a previous '=' if necessary to control line length
#------------------------------------------------------------
if ( $type eq '{' || $type eq '(' ) {
$lp_comma_count{ $total_depth + 1 } = 0;
$lp_arrow_count{ $total_depth + 1 } = 0;
# If we come to an opening token after an '=' token of some
# type, see if it would be helpful to 'break' after the '=' to
# save space
my $ii_last_equals = $last_lp_equals{$total_depth};
if ($ii_last_equals) {
$self->lp_equals_break_check( $ii, $ii_last_equals );
}
}
#------------------------
# Handle decreasing depth
#------------------------
# Note that one token may have both decreasing and then increasing
# depth. For example, (level, ci) can go from (1,1) to (2,0). So,
# in this example we would first go back to (1,0) then up to (2,0)
# in a single call.
if ( $level < $current_level || $ci_level < $current_ci_level ) {
$self->lp_decreasing_depth($ii);
}
#------------------------
# handle increasing depth
#------------------------
if ( $level > $current_level || $ci_level > $current_ci_level ) {
$self->lp_increasing_depth($ii);
}
#------------------
# Handle all tokens
#------------------
if ( $type ne 'b' ) {
# Count commas and look for non-list characters. Once we see a
# non-list character, we give up and don't look for any more
# commas.
if ( $type eq '=>' ) {
$lp_arrow_count{$total_depth}++;
# remember '=>' like '=' for estimating breaks (but see
# above note for b1035)
$last_lp_equals{$total_depth} = $ii;
}
elsif ( $type eq ',' ) {
$lp_comma_count{$total_depth}++;
}
elsif ( $is_assignment{$type} ) {
$last_lp_equals{$total_depth} = $ii;
}
else {
## not a special type
}
# this token might start a new line if ..
if (
$ii > $ii_begin_line
&& (
# this is the first nonblank token of the line
$ii == 1 && $types_to_go[0] eq 'b'
# or previous character was one of these:
# /^([\:\?\,f])$/
|| $hash_test2{$last_nonblank_type}
# or previous character was opening and this is not
# closing
|| ( $last_nonblank_type eq '{' && $type ne '}' )
|| ( $last_nonblank_type eq '(' and $type ne ')' )
# or this token is one of these:
# /^([\.]|\|\||\&\&)$/
|| $hash_test3{$type}
# or this is a closing structure
|| ( $last_nonblank_type eq '}'
&& $last_nonblank_token eq $last_nonblank_type )
# or previous token was keyword 'return'
|| (
$last_nonblank_type eq 'k'
&& ( $last_nonblank_token eq 'return'
&& $type ne '{' )
)
# or starting a new line at certain keywords is fine
|| ( $type eq 'k'
&& $is_if_unless_and_or_last_next_redo_return{
$token} )
# or this is after an assignment after a closing
# structure
|| (
$is_assignment{$last_nonblank_type}
&& (
# /^[\}\)\]]$/
$hash_test1{$last_last_nonblank_type}
# and it is significantly to the right
|| $lp_position_predictor > (
$maximum_line_length_at_level[$level] -
$rOpts_maximum_line_length / 2